123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061 |
- @article{Rogers2000,
- abstract = {The generation of memory T cells is critically important for rapid clearance and neutralization of pathogens encountered previously by the immune system. We have studied the kinetics of response and Ag dose requirements for proliferation and cytokine secretion of CD4+ memory T cells to examine whether there are qualitative changes which might lead to improved immunity. TCR Tg CD4+ T cells were primed in vitro and transferred into T cell-deficient hosts. After 6 or more weeks, the persisting T cells were exclusively small resting cells with a memory phenotype: CD44high CD62L+/- CD25-. Memory CD4 T cells showed a similar pattern of response as naive cells to peptide analogues with similar Ag dose requirements for IL-2 secretion. However, memory cells (derived from both Th2 and Th1 effectors) displayed faster kinetics of cytokine secretion, cell division, and proliferation, enhanced proliferation in response to low doses of Ag or peptide analogues, and production of IL-4, IL-5, and IFN-gamma. These results suggest there is a much more efficient response of CD4 memory T cells to Ag re-exposure and that the expanded functional capacity of memory cells will promote a rapid development of effector functions, providing more rapid and effective immunity.},
- author = {Rogers, Paul R. and Dubey, Caroline and Swain, Susan L.},
- doi = {10.4049/jimmunol.164.5.2338},
- file = {:Users/ryan/Documents/Mendeley Desktop/Rogers, Dubey, Swain - 2000 - Qualitative Changes Accompany Memory T Cell Generation Faster, More Effective Responses at Lower Doses of.pdf:pdf},
- issn = {0022-1767},
- journal = {The Journal of Immunology},
- number = {5},
- pages = {2338--2346},
- title = {{Qualitative Changes Accompany Memory T Cell Generation: Faster, More Effective Responses at Lower Doses of Antigen}},
- volume = {164},
- year = {2000}
- }
- @article{Ignatiadis2017,
- abstract = {Consider a multiple testing setup where we observe mutually independent pairs {\$}((P{\_}i, X{\_}i)){\_}{\{}1\backslashleq i \backslashleq m{\}}{\$} of p-values {\$}P{\_}i{\$} and covariates {\$}X{\_}i{\$}, such that {\$}P{\_}i \backslashperp X{\_}i{\$} under the null hypothesis. Our goal is to use the information potentially available in the covariates to increase power compared to conventional procedures that only use the {\$}P{\_}i{\$}, while controlling the false discovery rate (FDR). To this end, we recently introduced independent hypothesis weighting (IHW), a weighted Benjamini-Hochberg method, in which the weights are chosen as a function of the covariate {\$}X{\_}i{\$} in a data-driven manner. We showed empirically in simulations and datasets from genomics and proteomics that IHW leads to a large power increase, while controlling the FDR. The key idea was to use hypothesis splitting to learn the weight-covariate function without overfitting. In this paper, we provide a survey of IHW and related approaches by presenting them under the lens of the two-groups model, when it is valid conditionally on a covariate. Furthermore, a slightly modified variant of IHW is proposed and shown to enjoy finite sample FDR control. The same ideas can also be applied for finite-sample control of the family-wise error rate (FWER) via IHW-Bonferroni.},
- archivePrefix = {arXiv},
- arxivId = {1701.05179},
- author = {Ignatiadis, Nikolaos and Huber, Wolfgang},
- eprint = {1701.05179},
- file = {:Users/ryan/Documents/Mendeley Desktop/Ignatiadis, Huber - 2017 - Covariate-powered weighted multiple testing with false discovery rate control.pdf:pdf},
- keywords = {()},
- number = {0000},
- pages = {0--23},
- title = {{Covariate-powered weighted multiple testing with false discovery rate control}},
- url = {http://arxiv.org/abs/1701.05179},
- volume = {0},
- year = {2017}
- }
- @article{Chabbert2016,
- abstract = {The genome-wide study of epigenetic states requires the integrative analysis of histone modification ChIP-seq data. Here, we introduce an easy-to-use analytic framework to compare profiles of enrichment in histone modifications around classes of genomic elements, e.g. transcription start sites (TSS). Our framework is available via the user-friendly R/Bioconductor package DChIPRep . DChIPRep uses biological replicate information as well as chromatin Input data to allow for a rigorous assessment of differential enrichment. DChIPRep is available for download through the Bioconductor project at http://bioconductor.org/packages/DChIPRep .},
- author = {Chabbert, Christophe D. and Steinmetz, Lars M. and Klaus, Bernd},
- doi = {10.7717/peerj.1981},
- file = {:Users/ryan/Documents/Mendeley Desktop/Chabbert, Steinmetz, Klaus - 2016 - DChIPRep , an RBioconductor package for differential enrichment analysis in chromatin studies.pdf:pdf},
- issn = {2167-8359},
- journal = {PeerJ},
- month = {apr},
- pages = {e1981},
- title = {{DChIPRep , an R/Bioconductor package for differential enrichment analysis in chromatin studies}},
- url = {https://peerj.com/articles/1981},
- volume = {4},
- year = {2016}
- }
- @article{Yang2011,
- abstract = {Alfalfa, [Medicago sativa (L.) sativa], a widely-grown perennial forage has potential for development as a cellulosic ethanol feedstock. However, the genomics of alfalfa, a non-model species, is still in its infancy. The recent advent of RNA-Seq, a massively parallel sequencing method for transcriptome analysis, provides an opportunity to expand the identification of alfalfa genes and polymorphisms, and conduct in-depth transcript profiling.},
- author = {Yang, S Samuel and Tu, Zheng Jin and Cheung, Foo and Xu, Wayne Wenzhong and Lamb, JoAnn F S and Jung, Hans-Joachim G and Vance, Carroll P and Gronwald, John W},
- doi = {10.1186/1471-2164-12-199},
- file = {:Users/ryan/Documents/Mendeley Desktop/Yang et al. - 2011 - Using RNA-Seq for gene identification, polymorphism detection and transcript profiling in two alfalfa genotypes wit.pdf:pdf},
- issn = {1471-2164},
- journal = {BMC genomics},
- keywords = {Cell Wall,Cell Wall: metabolism,Gene Expression Profiling,Genes, Plant,Genes, Plant: genetics,Genotype,High-Throughput Nucleotide Sequencing,High-Throughput Nucleotide Sequencing: methods,Medicago sativa,Medicago sativa: cytology,Medicago sativa: genetics,Minisatellite Repeats,Minisatellite Repeats: genetics,Molecular Sequence Annotation,Plant Stems,Plant Stems: cytology,Plant Stems: genetics,Polymorphism, Single Nucleotide,Polymorphism, Single Nucleotide: genetics,RNA, Messenger,RNA, Messenger: genetics,Sequence Analysis, RNA,Sequence Analysis, RNA: methods,Titanium},
- month = {jan},
- number = {1},
- pages = {199},
- pmid = {21504589},
- publisher = {BioMed Central Ltd},
- title = {{Using RNA-Seq for gene identification, polymorphism detection and transcript profiling in two alfalfa genotypes with divergent cell wall composition in stems.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3112146{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {12},
- year = {2011}
- }
- @article{Durinck2008,
- author = {Durinck, Steffen and Huber, Wolfgang},
- file = {:Users/ryan/Documents/Mendeley Desktop/Durinck, Huber - 2008 - The biomaRt user ' s guide.pdf:pdf},
- pages = {1--22},
- title = {{The biomaRt user ' s guide}},
- year = {2008}
- }
- @article{Simes1986,
- abstract = {A modification of the Bonferroni procedure for testing multiple hypotheses is presented. The method, based on the ordered p-values of the individual tests, is less conservative than the classical Bonferroni procedure but is still simple to apply. A simulation study shows that the probability of a type I error of the procedure does not exceed the nominal significance level, a, for a variety of multivariate normal and multivariate gamma test statistics. For independent tests the procedure has type I error probability equal to a. The method appears particularly advantageous over the classical Bonferroni procedure when several highly-correlated test statistics are involved.},
- author = {Simes, R. J.},
- doi = {10.1093/biomet/73.3.751},
- file = {:Users/ryan/Documents/Mendeley Desktop/Simes - 1986 - An improved bonferroni procedure for multiple tests of significance.pdf:pdf},
- isbn = {00063444},
- issn = {00063444},
- journal = {Biometrika},
- keywords = {Bonferroni inequality,Multiple comparisons,Simultaneous test procedures},
- number = {3},
- pages = {751--754},
- title = {{An improved bonferroni procedure for multiple tests of significance}},
- url = {https://www.researchgate.net/profile/John{\_}Simes/publication/224817414{\_}An{\_}Improved{\_}Bonferroni{\_}Procedure{\_}for{\_}Multiple{\_}Tests{\_}of{\_}Significance/links/0c960516f0a0edf36c000000.pdf?origin=publication{\_}detail},
- volume = {73},
- year = {1986}
- }
- @article{Young2010,
- abstract = {We present GOseq, an application for performing Gene Ontology (GO) analysis on RNA-seq data. GO analysis is widely used to reduce complexity and highlight biological processes in genome-wide expression studies, but standard methods give biased results on RNA-seq data due to over-detection of differential expression for long and highly expressed transcripts. Application of GOseq to a prostate cancer data set shows that GOseq dramatically changes the results, highlighting categories more consistent with the known biology.},
- author = {Young, Matthew D and Wakefield, Matthew J and Smyth, Gordon K and Oshlack, Alicia},
- doi = {10.1186/gb-2010-11-2-r14},
- file = {:Users/ryan/Documents/Mendeley Desktop/Young et al. - 2010 - Gene ontology analysis for RNA-seq accounting for selection bias.pdf:pdf},
- issn = {1465-6914},
- journal = {Genome biology},
- keywords = {Androgens,Androgens: pharmacology,Bias (Epidemiology),Cell Line, Tumor,Gene Expression Regulation, Neoplastic,Genome-Wide Association Study,Humans,Male,Prostatic Neoplasms,Prostatic Neoplasms: genetics,Sequence Analysis, RNA,Sequence Analysis, RNA: methods},
- month = {jan},
- number = {2},
- pages = {R14},
- pmid = {20132535},
- title = {{Gene ontology analysis for RNA-seq: accounting for selection bias.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2872874{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {11},
- year = {2010}
- }
- @article{Zhang2014,
- abstract = {To characterize the role of the circadian clock in mouse physiology and behavior, we used RNA-seq and DNA arrays to quantify the transcriptomes of 12 mouse organs over time. We found 43{\%} of all protein coding genes showed circadian rhythms in transcription somewhere in the body, largely in an organ-specific manner. In most organs, we noticed the expression of many oscillating genes peaked during transcriptional "rush hours" preceding dawn and dusk. Looking at the genomic landscape of rhythmic genes, we saw that they clustered together, were longer, and had more spliceforms than nonoscillating genes. Systems-level analysis revealed intricate rhythmic orchestration of gene pathways throughout the body. We also found oscillations in the expression of more than 1,000 known and novel noncoding RNAs (ncRNAs). Supporting their potential role in mediating clock function, ncRNAs conserved between mouse and human showed rhythmic expression in similar proportions as protein coding genes. Importantly, we also found that the majority of best-selling drugs and World Health Organization essential medicines directly target the products of rhythmic genes. Many of these drugs have short half-lives and may benefit from timed dosage. In sum, this study highlights critical, systemic, and surprising roles of the mammalian circadian clock and provides a blueprint for advancement in chronotherapy.},
- author = {Zhang, Ray and Lahens, Nicholas F and Ballance, Heather I and Hughes, Michael E and Hogenesch, John B},
- doi = {10.1073/pnas.1408886111},
- file = {:Users/ryan/Documents/Mendeley Desktop/Zhang et al. - 2014 - A circadian gene expression atlas in mammals implications for biology and medicine.pdf:pdf},
- isbn = {1091-6490 (Electronic)$\backslash$r0027-8424 (Linking)},
- issn = {1091-6490},
- journal = {Proceedings of the National Academy of Sciences of the United States of America},
- keywords = {Animals,Chronotherapy,Chronotherapy: methods,Circadian Rhythm,Circadian Rhythm: physiology,Databases, Nucleic Acid,Gene Expression Profiling,Gene Expression Profiling: methods,Gene Expression Regulation,Gene Expression Regulation: physiology,Humans,Mice,Transcriptome,Transcriptome: physiology},
- number = {45},
- pages = {16219--24},
- pmid = {25349387},
- title = {{A circadian gene expression atlas in mammals: implications for biology and medicine.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=4234565{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {111},
- year = {2014}
- }
- @article{Smyth2004,
- abstract = {The problem of identifying differentially expressed genes in designed microarray experiments is considered. Lonnstedt and Speed (2002) derived an expression for the posterior odds of differential expression in a replicated two-color experiment using a simple hierarchical parametric model. The purpose of this paper is to develop the hierarchical model of Lonnstedt and Speed (2002) into a practical approach for general microarray experiments with arbitrary numbers of treatments and RNA samples. The model is reset in the context of general linear models with arbitrary coefficients and contrasts of interest. The approach applies equally well to both single channel and two color microarray experiments. Consistent, closed form estimators are derived for the hyperparameters in the model. The estimators proposed have robust behavior even for small numbers of arrays and allow for incomplete data arising from spot filtering or spot quality weights. The posterior odds statistic is reformulated in terms of a moderated t-statistic in which posterior residual standard deviations are used in place of ordinary standard deviations. The empirical Bayes approach is equivalent to shrinkage of the estimated sample variances towards a pooled estimate, resulting in far more stable inference when the number of arrays is small. The use of moderated t-statistics has the advantage over the posterior odds that the number of hyperparameters which need to estimated is reduced; in particular, knowledge of the non-null prior for the fold changes are not required. The moderated t-statistic is shown to follow a t-distribution with augmented degrees of freedom. The moderated t inferential approach extends to accommodate tests of composite null hypotheses through the use of moderated F-statistics. The performance of the methods is demonstrated in a simulation study. Results are presented for two publicly available data sets.},
- author = {Smyth, Gordon K},
- doi = {10.2202/1544-6115.1027},
- issn = {1544-6115},
- journal = {Statistical Applications in Genetics and Molecular Biology},
- keywords = {bayes,microarray,moderated t-statistic},
- mendeley-tags = {bayes,microarray,moderated t-statistic},
- month = {jan},
- number = {1},
- pages = {1--25},
- pmid = {16646809},
- title = {{Linear Models and Empirical Bayes Methods for Assessing Differential Expression in Microarray Experiments}},
- url = {https://www.degruyter.com/view/j/sagmb.2004.3.issue-1/sagmb.2004.3.1.1027/sagmb.2004.3.1.1027.xml},
- volume = {3},
- year = {2004}
- }
- @article{Einecke2010,
- abstract = {Kidney transplant recipients that develop signs of renal dysfunction or proteinuria one or more years after transplantation are at considerable risk for progression to renal failure. To assess the kidney at this time, a "for-cause" biopsy is performed, but this provides little indication as to which recipients will go on to organ failure. In an attempt to identify molecules that could provide this information, we used microarrays to analyze gene expression in 105 for-cause biopsies taken between 1 and 31 years after transplantation. Using supervised principal components analysis, we derived a molecular classifier to predict graft loss. The genes associated with graft failure were related to tissue injury, epithelial dedifferentiation, matrix remodeling, and TGF-beta effects and showed little overlap with rejection-associated genes. We assigned a prognostic molecular risk score to each patient, identifying those at high or low risk for graft loss. The molecular risk score was correlated with interstitial fibrosis, tubular atrophy, tubulitis, interstitial inflammation, proteinuria, and glomerular filtration rate. In multivariate analysis, molecular risk score, peritubular capillary basement membrane multilayering, arteriolar hyalinosis, and proteinuria were independent predictors of graft loss. In an independent validation set, the molecular risk score was the only predictor of graft loss. Thus, the molecular risk score reflects active injury and is superior to either scarring or function in predicting graft failure.},
- author = {Einecke, Gunilla and Reeve, Jeff and Sis, Banu and Mengel, Michael and Hidalgo, Luis and Famulski, Konrad S and Matas, Arthur and Kasiske, Bert and Kaplan, Bruce and Halloran, Philip F},
- doi = {10.1172/JCI41789},
- file = {:Users/ryan/Documents/Mendeley Desktop//Einecke et al. - 2010 - A molecular classifier for predicting future graft loss in late kidney transplant biopsies.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Einecke et al. - 2010 - A molecular classifier for predicting future graft loss in late kidney transplant biopsies.pdf:pdf},
- issn = {1558-8238},
- journal = {The Journal of clinical investigation},
- keywords = {Biopsy,Capillaries,Capillaries: pathology,Disease Progression,Forecasting,Glomerular Filtration Rate,Graft Rejection,Graft Rejection: diagnosis,Graft Rejection: pathology,Humans,Kidney,Kidney Transplantation,Kidney Transplantation: pathology,Kidney: pathology,Proteinuria,Proteinuria: pathology,Renal Insufficiency,Renal Insufficiency: pathology,Transforming Growth Factor beta},
- month = {jun},
- number = {6},
- pages = {1862--72},
- pmid = {20501945},
- title = {{A molecular classifier for predicting future graft loss in late kidney transplant biopsies.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2877953{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {120},
- year = {2010}
- }
- @article{Wasserstein2016,
- author = {Wasserstein, Ronald L. and Lazar, Nicole A.},
- doi = {10.1080/00031305.2016.1154108},
- file = {:Users/ryan/Documents/Mendeley Desktop/Wasserstein, Lazar - 2016 - The ASA's statement on p-values context, process, and purpose.pdf:pdf},
- issn = {0003-1305},
- journal = {The American Statistician},
- number = {March},
- pages = {00--00},
- title = {{The ASA's statement on p-values: context, process, and purpose}},
- url = {http://www.tandfonline.com/doi/full/10.1080/00031305.2016.1154108},
- volume = {1305},
- year = {2016}
- }
- @article{Hart,
- author = {Hart, G Traver and Komori, H Kiyomi and Lamere, Sarah A and Podshivalova, Katie and Grigoriev, Yevgeniy A and Salomon, Daniel R},
- title = {{The Noisy Human Transcriptome: Implications for the Design of Efficient RNA-Seq Experiments}}
- }
- @article{Leek2014,
- abstract = {It is now known that unwanted noise and unmodeled artifacts such as batch effects can dramatically reduce the accuracy of statistical inference in genomic experiments. These sources of noise must be modeled and removed to accurately measure biological variability and to obtain correct statistical inference when performing high-throughput genomic analysis. We introduced surrogate variable analysis (sva) for estimating these artifacts by (i) identifying the part of the genomic data only affected by artifacts and (ii) estimating the artifacts with principal components or singular vectors of the subset of the data matrix. The resulting estimates of artifacts can be used in subsequent analyses as adjustment factors to correct analyses. Here I describe a version of the sva approach specifically created for count data or FPKMs from sequencing experiments based on appropriate data transformation. I also describe the addition of supervised sva (ssva) for using control probes to identify the part of the genomic data only affected by artifacts. I present a comparison between these versions of sva and other methods for batch effect estimation on simulated data, real count-based data and FPKM-based data. These updates are available through the sva Bioconductor package and I have made fully reproducible analysis using these methods available from: https://github.com/jtleek/svaseq.},
- author = {Leek, Jeffrey T},
- doi = {10.1093/nar/gku864},
- file = {:Users/ryan/Documents/Mendeley Desktop/Leek - 2014 - svaseq removing batch effects and other unwanted noise from sequencing data.pdf:pdf},
- issn = {1362-4962},
- journal = {Nucleic acids research},
- month = {dec},
- number = {21},
- pages = {0--11},
- pmid = {25294822},
- title = {svaseq: removing batch effects and other unwanted noise from sequencing data.},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/25294822 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC4245966},
- volume = {42},
- year = {2014}
- }
- @article{Tian2005a,
- abstract = {Accurate and rapid identification of perturbed pathways through the analysis of genome-wide expression profiles facilitates the generation of biological hypotheses. We propose a statistical framework for determining whether a specified group of genes for a pathway has a coordinated association with a phenotype of interest. Several issues on proper hypothesis-testing procedures are clarified. In particular, it is shown that the differences in the correlation structure of each set of genes can lead to a biased comparison among gene sets unless a normalization procedure is applied. We propose statistical tests for two important but different aspects of association for each group of genes. This approach has more statistical power than currently available methods and can result in the discovery of statistically significant pathways that are not detected by other methods. This method is applied to data sets involving diabetes, inflammatory myopathies, and Alzheimer's disease, using gene sets we compiled from various public databases. In the case of inflammatory myopathies, we have correctly identified the known cytotoxic T lymphocyte-mediated autoimmunity in inclusion body myositis. Furthermore, we predicted the presence of dendritic cells in inclusion body myositis and of an IFN-alpha/beta response in dermatomyositis, neither of which was previously described. These predictions have been subsequently corroborated by immunohistochemistry.},
- author = {Tian, Lu and Greenberg, Steven a and Kong, Sek Won and Altschuler, Josiah and Kohane, Isaac S and Park, Peter J},
- doi = {10.1073/pnas.0506577102},
- file = {:Users/ryan/Documents/Mendeley Desktop/Tian et al. - 2005 - Discovering statistically significant pathways in expression profiling studies.pdf:pdf},
- issn = {0027-8424},
- journal = {Proceedings of the National Academy of Sciences of the United States of America},
- keywords = {Algorithms,Alzheimer Disease,Alzheimer Disease: genetics,Alzheimer Disease: metabolism,Animals,Autoimmunity,Autoimmunity: genetics,Autoimmunity: physiology,Databases, Genetic,Dermatomyositis,Dermatomyositis: genetics,Dermatomyositis: metabolism,Gene Expression Profiling,Gene Expression Profiling: methods,Gene Expression Regulation,Gene Expression Regulation: physiology,Humans,Interferon-alpha,Interferon-alpha: genetics,Interferon-alpha: metabolism,Interferon-beta,Interferon-beta: genetics,Interferon-beta: metabolism,Models, Genetic,Myositis,Myositis, Inclusion Body,Myositis, Inclusion Body: genetics,Myositis, Inclusion Body: metabolism,Myositis: genetics,Myositis: metabolism,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Predictive Value of Tests,T-Lymphocytes,T-Lymphocytes: metabolism,Transcription Factors,Transcription Factors: metabolism},
- month = {sep},
- number = {38},
- pages = {13544--9},
- pmid = {16174746},
- title = {{Discovering statistically significant pathways in expression profiling studies.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=1200092{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {102},
- year = {2005}
- }
- @article{Robinson2008,
- abstract = {We derive a quantile-adjusted conditional maximum likelihood estimator for the dispersion parameter of the negative binomial distribution and compare its performance, in terms of bias, to various other methods. Our estimation scheme outperforms all other methods in very small samples, typical of those from serial analysis of gene expression studies, the motivating data for this study. The impact of dispersion estimation on hypothesis testing is studied. We derive an "exact" test that outperforms the standard approximate asymptotic tests.},
- author = {Robinson, Mark D and Smyth, Gordon K},
- doi = {10.1093/biostatistics/kxm030},
- file = {:Users/ryan/Documents/Mendeley Desktop/Robinson, Smyth - 2008 - Small-sample estimation of negative binomial dispersion, with applications to SAGE data.pdf:pdf},
- issn = {1465-4644},
- journal = {Biostatistics (Oxford, England)},
- keywords = {Bias (Epidemiology),Binomial Distribution,Biometry,Biometry: methods,Expressed Sequence Tags,Gene Expression Profiling,Gene Expression Profiling: methods,Gene Expression Profiling: statistics {\&} numerical,Gene Library,Humans,Information Storage and Retrieval,Information Storage and Retrieval: methods,Information Storage and Retrieval: statistics {\&} nu,Likelihood Functions,RNA, Messenger,RNA, Messenger: analysis,Regression Analysis,Research Design,Research Design: statistics {\&} numerical data,Sample Size,Stochastic Processes,Weights and Measures},
- month = {apr},
- number = {2},
- pages = {321--32},
- pmid = {17728317},
- title = {{Small-sample estimation of negative binomial dispersion, with applications to SAGE data.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/17728317},
- volume = {9},
- year = {2008}
- }
- @article{Ryan2007,
- abstract = {The ability of mesenchymal stem cells (MSC) to suppress alloresponsiveness is poorly understood. Herein, an allogeneic mixed lymphocyte response was used as a model to investigate the mechanisms of MSC-mediated immunomodulation. Human MSC are demonstrated to express the immunosuppressive cytokines hepatocyte growth factor (HGF), interleukin (IL)-10 and transforming growth factor (TGF)-?1 at concentrations that suppress alloresponses in vitro. MSC also express cyclooxygenase 1 and 2 and produce prostaglandin E2 constitutively. Blocking studies with indomethacin confirmed that prostaglandins contribute to MSC-mediated allosuppression. The proinflammatory cytokine interferon (IFN)-? did not ablate MSC inhibition of alloantigen-driven proliferation but up-regulated HGF and TGF-?1. IFN-? also induced expression of indoleamine 2,3, dioxygenase (IDO), involved in tryptophan catabolism. Use of an antagonist, 1-methyl-L-tryptophan, restored alloresponsiveness and confirmed an IDO contribution to IFN-?-induced immunomodulation by MSC. Addition of the tryptophan catabolite kynurenine to mixed lymphocyte reactions (MLR), blocked alloproliferation. These findings support a model where IDO exerts its effect through the local accumulation of tryptophan metabolites rather than through tryptophan depletion. Taken together, these data demonstrate that soluble factors, or products derived from MSC, modulate immune responses and suggest that MSC create an immunosuppressive microenvironment capable of modulating alloresponsiveness even in the presence of IFN-?. ? 2007 The Author(s).},
- author = {Ryan, J. M. and Barry, F. and Murphy, J. M. and Mahon, B. P.},
- doi = {10.1111/j.1365-2249.2007.03422.x},
- file = {:Users/ryan/Documents/Mendeley Desktop/Ryan et al. - 2007 - Interferon-$\gamma$ does not break, but promotes the immunosuppressive capacity of adult human mesenchymal stem cells.pdf:pdf},
- issn = {00099104},
- journal = {Clinical {\&} Experimental Immunology},
- keywords = {IFN-$\gamma$,Inflammation,Stem cells},
- month = {aug},
- number = {2},
- pages = {353--363},
- title = {{Interferon-$\gamma$ does not break, but promotes the immunosuppressive capacity of adult human mesenchymal stem cells}},
- url = {http://doi.wiley.com/10.1111/j.1365-2249.2007.03422.x},
- volume = {149},
- year = {2007}
- }
- @article{Strimmer2008a,
- abstract = {SUMMARY: False discovery rate (FDR) methodologies are essential in the study of high-dimensional genomic and proteomic data. The R package 'fdrtool' facilitates such analyses by offering a comprehensive set of procedures for FDR estimation. Its distinctive features include: (i) many different types of test statistics are allowed as input data, such as P-values, z-scores, correlations and t-scores; (ii) simultaneously, both local FDR and tail area-based FDR values are estimated for all test statistics and (iii) empirical null models are fit where possible, thereby taking account of potential over- or underdispersion of the theoretical null. In addition, 'fdrtool' provides readily interpretable graphical output, and can be applied to very large scale (in the order of millions of hypotheses) multiple testing problems. Consequently, 'fdrtool' implements a flexible FDR estimation scheme that is unified across different test statistics and variants of FDR. AVAILABILITY: The program is freely available from the Comprehensive R Archive Network (http://cran.r-project.org/) under the terms of the GNU General Public License (version 3 or later). CONTACT: strimmer@uni-leipzig.de.},
- author = {Strimmer, Korbinian},
- doi = {10.1093/bioinformatics/btn209},
- file = {:Users/ryan/Documents/Mendeley Desktop/Strimmer - 2008 - fdrtool A versatile R package for estimating local and tail area-based false discovery rates.pdf:pdf},
- isbn = {3900051070},
- issn = {13674803},
- journal = {Bioinformatics},
- number = {12},
- pages = {1461--1462},
- pmid = {18441000},
- title = {{fdrtool: A versatile R package for estimating local and tail area-based false discovery rates}},
- url = {http://bioinformatics.oxfordjournals.org/content/24/12/1461.full.pdf},
- volume = {24},
- year = {2008}
- }
- @article{Leinonen2011,
- abstract = {The combination of significantly lower cost and increased speed of sequencing has resulted in an explosive growth of data submitted into the primary next-generation sequence data archive, the Sequence Read Archive (SRA). The preservation of experimental data is an important part of the scientific record, and increasing numbers of journals and funding agencies require that next-generation sequence data are deposited into the SRA. The SRA was established as a public repository for the next-generation sequence data and is operated by the International Nucleotide Sequence Database Collaboration (INSDC). INSDC partners include the National Center for Biotechnology Information (NCBI), the European Bioinformatics Institute (EBI) and the DNA Data Bank of Japan (DDBJ). The SRA is accessible at http://www.ncbi.nlm.nih.gov/ Traces/sra from NCBI, at http://www.ebi.ac.uk/ena from EBI and at http://trace.ddbj.nig.ac.jp from DDBJ. In this article, we present the content and structure of the SRA, detail our support for sequencing platforms and provide recommended data submission levels and formats. We also briefly outline our response to the challenge of data growth. {\textcopyright} The Author(s) 2010.},
- author = {Leinonen, Rasko and Sugawara, Hideaki and Shumway, Martin},
- doi = {10.1093/nar/gkq1019},
- file = {:Users/ryan/Documents/Mendeley Desktop/Leinonen, Sugawara, Shumway - 2011 - The sequence read archive.pdf:pdf},
- issn = {03051048},
- journal = {Nucleic Acids Research},
- number = {SUPPL. 1},
- pages = {2010--2012},
- title = {{The sequence read archive}},
- url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3013647/pdf/gkq1019.pdf},
- volume = {39},
- year = {2011}
- }
- @article{Clark2014,
- author = {Clark, Neil R and Hu, Kevin S and Feldmann, Axel S and Kou, Yan and Chen, Edward Y and Duan, Qiaonan and Ma¿ayan, Avi},
- doi = {10.1186/1471-2105-15-79},
- file = {:Users/ryan/Documents/Mendeley Desktop/Clark et al. - 2014 - The characteristic direction a geometrical approach to identify differentially expressed genes.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Clark et al. - 2014 - The characteristic direction a geometrical approach to identify differentially expressed genes.pdf:pdf},
- issn = {1471-2105},
- journal = {BMC Bioinformatics},
- number = {1},
- pages = {79},
- title = {{The characteristic direction: a geometrical approach to identify differentially expressed genes}},
- url = {http://www.biomedcentral.com/1471-2105/15/79},
- volume = {15},
- year = {2014}
- }
- @article{Fernandes2013,
- abstract = {Experimental variance is a major challenge when dealing with high-throughput sequencing data. This variance has several sources: sampling replication, technical replication, variability within biological conditions, and variability between biological conditions. The high per-sample cost of RNA-Seq often precludes the large number of experiments needed to partition observed variance into these categories as per standard ANOVA models. We show that the partitioning of within-condition to between-condition variation cannot reasonably be ignored, whether in single-organism RNA-Seq or in Meta-RNA-Seq experiments, and further find that commonly-used RNA-Seq analysis tools, as described in the literature, do not enforce the constraint that the sum of relative expression levels must be one, and thus report expression levels that are systematically distorted. These two factors lead to misleading inferences if not properly accommodated. As it is usually only the biological between-condition and within-condition differences that are of interest, we developed ALDEx, an ANOVA-like differential expression procedure, to identify genes with greater between- to within-condition differences. We show that the presence of differential expression and the magnitude of these comparative differences can be reasonably estimated with even very small sample sizes.},
- author = {Fernandes, Andrew D and Macklaim, Jean M and Linn, Thomas G and Reid, Gregor and Gloor, Gregory B},
- doi = {10.1371/journal.pone.0067019},
- file = {:Users/ryan/Documents/Mendeley Desktop/Fernandes et al. - 2013 - ANOVA-like differential expression (ALDEx) analysis for mixed population RNA-Seq.pdf:pdf},
- issn = {1932-6203},
- journal = {PloS one},
- month = {jan},
- number = {7},
- pages = {e67019},
- pmid = {23843979},
- title = {{ANOVA-like differential expression (ALDEx) analysis for mixed population RNA-Seq.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3699591{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {8},
- year = {2013}
- }
- @article{Patro2017,
- annote = {NULL},
- author = {Patro, Rob and Duggal, Geet and Love, Michael I and Irizarry, Rafael A and Kingsford, Carl},
- doi = {10.1038/nmeth.4197},
- file = {:Users/ryan/Documents/Mendeley Desktop/Patro et al. - 2017 - Salmon provides fast and bias-aware quantification of transcript expression.pdf:pdf},
- issn = {1548-7091},
- journal = {Nature Methods},
- month = {apr},
- number = {4},
- pages = {417--419},
- publisher = {Nature Publishing Group},
- title = {{Salmon provides fast and bias-aware quantification of transcript expression}},
- url = {http://www.nature.com/articles/nmeth.4197},
- volume = {14},
- year = {2017}
- }
- @article{Diego,
- author = {Diego, San and Cruz, Santa},
- file = {:Users/ryan/Documents/Mendeley Desktop/Diego, Cruz - Unknown - Supporting Online Material ( SOM ) Materials and Methods.pdf:pdf},
- number = {2},
- title = {{Supporting Online Material ( SOM ) Materials and Methods}},
- volume = {001}
- }
- @article{Stanke2008,
- abstract = {Computational annotation of protein coding genes in genomic DNA is a widely used and essential tool for analyzing newly sequenced genomes. However, current methods suffer from inaccuracy and do poorly with certain types of genes. Including additional sources of evidence of the existence and structure of genes can improve the quality of gene predictions. For many eukaryotic genomes, expressed sequence tags (ESTs) are available as evidence for genes. Related genomes that have been sequenced, annotated, and aligned to the target genome provide evidence of existence and structure of genes.},
- author = {Stanke, Mario and Diekhans, Mark and Baertsch, Robert and Haussler, David},
- doi = {10.1093/bioinformatics/btn013},
- file = {:Users/ryan/Documents/Mendeley Desktop/Stanke et al. - 2008 - Using native and syntenically mapped cDNA alignments to improve de novo gene finding.pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {Alternative Splicing,Animals,DNA, Complementary,DNA, Complementary: genetics,Expressed Sequence Tags,Humans,Sequence Alignment},
- month = {mar},
- number = {5},
- pages = {637--44},
- pmid = {18218656},
- title = {{Using native and syntenically mapped cDNA alignments to improve de novo gene finding.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/18218656},
- volume = {24},
- year = {2008}
- }
- @article{Yang2013,
- abstract = {Recently, long noncoding RNAs (lncRNAs) were found to be dysregulated in a variety of tumors. However, it remains unknown how and through what molecular mechanisms the expression of lncRNAs is controlled. In this study, we found that the lncRNA Low Expression in Tumor (lncRNA-LET) was generally downregulated in hepatocellular carcinomas, colorectal cancers, and squamous-cell lung carcinomas. We demonstrated that hypoxia-induced histone deacetylase 3 repressed lncRNA-LET by reducing the histone acetylation-mediated modulation of the lncRNA-LET promoter region. Interestingly, the downregulation of lncRNA-LET was found to be a key step in the stabilization of nuclear factor 90 protein, which leads to hypoxia-induced cancer cell invasion. Moreover, the relationship among hypoxia, histone acetylation disorder, low lncRNA-LET expression level, and metastasis was found in clinical hepatocellular carcinoma samples. These results advance our understanding of the role of lncRNA-LET as a regulator of hypoxia signaling and offer new avenues for therapeutic intervention against cancer progression. {\textcopyright} 2013 Elsevier Inc.},
- author = {Yang, Fu and song Huo, Xi and xian Yuan, Sheng and Zhang, Ling and ping Zhou, Wei and Wang, Fang and han Sun, Shu},
- doi = {10.1016/j.molcel.2013.01.010},
- file = {:Users/ryan/Documents/Mendeley Desktop/Yang et al. - 2013 - Repression of the Long Noncoding RNA-LET by Histone Deacetylase 3 Contributes to Hypoxia-Mediated Metastasis.pdf:pdf},
- isbn = {1097-4164 (Electronic)$\backslash$r1097-2765 (Linking)},
- issn = {10972765},
- journal = {Molecular Cell},
- number = {6},
- pages = {1083--1096},
- pmid = {23395002},
- publisher = {Elsevier Inc.},
- title = {{Repression of the Long Noncoding RNA-LET by Histone Deacetylase 3 Contributes to Hypoxia-Mediated Metastasis}},
- url = {http://dx.doi.org/10.1016/j.molcel.2013.01.010},
- volume = {49},
- year = {2013}
- }
- @article{Sathe2013,
- author = {Sathe, Ganesh},
- file = {:Users/ryan/Documents/Mendeley Desktop/Sathe - 2013 - Applications of RNA-Seq within Research {\&} Development at GSK Ganesh Sathe.pdf:pdf},
- title = {{Applications of RNA-Seq within Research {\&} Development at GSK Ganesh Sathe}},
- year = {2013}
- }
- @article{Vartanian2009,
- abstract = {Peripheral blood is an accessible and informative source of transcriptomal information for many human disease and pharmacogenomic studies. While there can be significant advantages to analyzing RNA isolated from whole blood, particularly in clinical studies, the preparation of samples for microarray analysis is complicated by the need to minimize artifacts associated with highly abundant globin RNA transcripts. The impact of globin RNA transcripts on expression profiling data can potentially be reduced by using RNA preparation and labeling methods that remove or block globin RNA during the microarray assay. We compared four different methods for preparing microarray hybridization targets from human whole blood collected in PAXGene tubes. Three of the methods utilized the Affymetrix one-cycle cDNA synthesis/in vitro transcription protocol but varied treatment of input RNA as follows: i. no treatment; ii. treatment with GLOBINclear; or iii. treatment with globin PNA oligos. In the fourth method cDNA targets were prepared with the Ovation amplification and labeling system.},
- author = {Vartanian, Kristina and Slottke, Rachel and Johnstone, Timothy and Casale, Amanda and Planck, Stephen R and Choi, Dongseok and Smith, Justine R and Rosenbaum, James T and Harrington, Christina a},
- doi = {10.1186/1471-2164-10-2},
- file = {:Users/ryan/Documents/Mendeley Desktop/Vartanian et al. - 2009 - Gene expression profiling of whole blood comparison of target preparation methods for accurate and reproducibl.pdf:pdf},
- issn = {1471-2164},
- journal = {BMC genomics},
- keywords = {Freezing,Gene Expression Profiling,Gene Expression Profiling: methods,Globins,Globins: genetics,Humans,NF-kappa B,NF-kappa B: genetics,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,RNA,RNA, Messenger,RNA, Messenger: blood,RNA: blood,Reproducibility of Results,Sensitivity and Specificity},
- month = {jan},
- pages = {2},
- pmid = {19123946},
- title = {{Gene expression profiling of whole blood: comparison of target preparation methods for accurate and reproducible microarray analysis.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2649161{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {10},
- year = {2009}
- }
- @article{Stratmann2011,
- abstract = {Interactions of transcription factors with chromatin are highly dynamic. Now Voss et al. (2011) demonstrate that two transcription factors with identical DNA-binding specificities do not compete for occupancy at a given DNA element, but instead, one factor can even facilitate the binding of another. This assisted loading probably involves chromatin-remodeling machines.},
- author = {Stratmann, Markus and Schibler, Ueli},
- doi = {10.1016/j.cell.2011.07.037},
- file = {:Users/ryan/Documents/Mendeley Desktop/Stratmann, Schibler - 2011 - Transcription factor loading please take my place!.pdf:pdf},
- issn = {1097-4172},
- journal = {Cell},
- month = {aug},
- number = {4},
- pages = {497--9},
- pmid = {21854974},
- publisher = {Elsevier Inc.},
- title = {{Transcription factor loading: please take my place!}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/21854974},
- volume = {146},
- year = {2011}
- }
- @article{Landau2013,
- abstract = {A central goal of RNA sequencing (RNA-seq) experiments is to detect differentially expressed genes. In the ubiquitous negative binomial model for RNA-seq data, each gene is given a dispersion parameter, and correctly estimating these dispersion parameters is vital to detecting differential expression. Since the dispersions control the variances of the gene counts, underestimation may lead to false discovery, while overestimation may lower the rate of true detection. After briefly reviewing several popular dispersion estimation methods, this article describes a simulation study that compares them in terms of point estimation and the effect on the performance of tests for differential expression. The methods that maximize the test performance are the ones that use a moderate degree of dispersion shrinkage: the DSS, Tagwise wqCML, and Tagwise APL. In practical RNA-seq data analysis, we recommend using one of these moderate-shrinkage methods with the QLShrink test in QuasiSeq R package.},
- author = {Landau, William Michael and Liu, Peng},
- doi = {10.1371/journal.pone.0081415},
- file = {:Users/ryan/Documents/Mendeley Desktop/Landau, Liu - 2013 - Dispersion estimation and its effect on test performance in RNA-seq data analysis a simulation-based comparison of.pdf:pdf},
- issn = {1932-6203},
- journal = {PloS one},
- keywords = {Gene Expression Profiling,Sequence Analysis, RNA,Sequence Analysis, RNA: methods},
- number = {12},
- pages = {e81415},
- pmid = {24349066},
- title = {{Dispersion estimation and its effect on test performance in RNA-seq data analysis: a simulation-based comparison of methods.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3857202{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {8},
- year = {2013}
- }
- @article{Dobbin2005,
- abstract = {Determining sample sizes for microarray experiments is important but the complexity of these experiments, and the large amounts of data they produce, can make the sample size issue seem daunting, and tempt researchers to use rules of thumb in place of formal calculations based on the goals of the experiment. Here we present formulae for determining sample sizes to achieve a variety of experimental goals, including class comparison and the development of prognostic markers. Results are derived which describe the impact of pooling, technical replicates and dye-swap arrays on sample size requirements. These results are shown to depend on the relative sizes of different sources of variability. A variety of common types of experimental situations and designs used with single-label and dual-label microarrays are considered. We discuss procedures for controlling the false discovery rate. Our calculations are based on relatively simple yet realistic statistical models for the data, and provide straightforward sample size calculation formulae.},
- author = {Dobbin, Kevin and Simon, Richard},
- doi = {10.1093/biostatistics/kxh015},
- file = {:Users/ryan/Documents/Mendeley Desktop/Dobbin, Simon - 2005 - Sample size determination in microarray experiments for class comparison and prognostic classification.pdf:pdf},
- issn = {1465-4644},
- journal = {Biostatistics (Oxford, England)},
- keywords = {Biological Markers,False Positive Reactions,Humans,Linear Models,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Predictive Value of Tests,Prognosis,Sample Size},
- month = {jan},
- number = {1},
- pages = {27--38},
- pmid = {15618525},
- title = {{Sample size determination in microarray experiments for class comparison and prognostic classification.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/15618525},
- volume = {6},
- year = {2005}
- }
- @misc{TheMendeleySupportTeam2011,
- abstract = {A quick introduction to Mendeley. Learn how Mendeley creates your personal digital library, how to organize and annotate documents, how to collaborate and share with colleagues, and how to generate citations and bibliographies.},
- address = {London},
- author = {{The Mendeley Support Team}},
- booktitle = {Mendeley Desktop},
- file = {:Users/ryan/Documents/Mendeley Desktop/The Mendeley Support Team - 2011 - Getting Started with Mendeley.pdf:pdf},
- keywords = {Mendeley,how-to,user manual},
- pages = {1--16},
- publisher = {Mendeley Ltd.},
- title = {{Getting Started with Mendeley}},
- url = {http://www.mendeley.com},
- year = {2011}
- }
- @article{Semedo2010,
- abstract = {One of the early phases that lead to fibrosis progression is inflammation. Once this stage is resolved, fibrosis might be prevented. Bone marrow mononuclear cells (BMMCs) are emerging as a new therapy for several pathologies, including autoimmune diseases, because they enact immunosuppression. In this study we aimed to evaluate the role of BMMC administration in a model of kidney fibrosis induced by an acute injury. C57Bl6 mice were subjected to unilateral severe ischemia by clamping the left renal pedicle for 1h. BMMCs were isolated from femurs and tibia, and after 6h of reperfusion, 1 x 10(6) cells were administrated intraperitoneally. At 24h after surgery, treated animals showed a significant decrease in creatinine and urea levels when compared with untreated animals. Different administration routes were tested. Moreover, interferon (IFN) receptor knockout BMMCs were used, as this receptor is necessary for BMMC activation. Labeled BMMCs were found in ischemic kidney on FACS analysis. This improved outcome was associated with modulation of inflammation in the kidney and systemic modulation, as determined by cytokine expression profiling. Despite non-amelioration of functional parameters, kidney mRNA expression of interleukin (IL)-6 at 6 weeks was lower in BMMC-treated animals, as were levels of collagen 1, connective tissue growth factor (CTGF), transforming growth factor-beta (TGF-beta) and vimentin. Protective molecules, such as IL-10, heme oxygenase 1 (HO-1) and bone morphogenetic 7 (BMP-7), were increased in treated animals after 6 weeks. Moreover, Masson and Picrosirius red staining analyses showed less fibrotic areas in the kidneys of treated animals. Thus, early modulation of inflammation by BMMCs after an ischemic injury leads to reduced fibrosis through modulation of early inflammation.},
- author = {Semedo, Patricia and Donizetti-Oliveira, Cassiano and Burgos-Silva, Marina and Cenedeze, Marco Antonio and {Avancini Costa Malheiros}, Denise Maria and Pacheco-Silva, Alvaro and C{\^{a}}mara, Niels Olsen Saraiva},
- doi = {10.1038/labinvest.2010.45},
- file = {:Users/ryan/Documents/Mendeley Desktop/Semedo et al. - 2010 - Bone marrow mononuclear cells attenuate fibrosis development after severe acute kidney injury.pdf:pdf},
- issn = {1530-0307},
- journal = {Laboratory investigation; a journal of technical methods and pathology},
- keywords = {Acute Disease,Animals,Antigens, CD34,Antigens, CD34: analysis,Bone Marrow Cells,Bone Marrow Cells: cytology,Bone Marrow Cells: metabolism,Bone Morphogenetic Protein 7,Bone Morphogenetic Protein 7: genetics,Bone Morphogenetic Protein 7: metabolism,Cell Transplantation,Cell Transplantation: methods,Cells, Cultured,Cytokines,Cytokines: genetics,Cytokines: metabolism,Female,Fibrosis,Fibrosis: surgery,Gene Expression,Heme Oxygenase-1,Heme Oxygenase-1: genetics,Heme Oxygenase-1: metabolism,Immunohistochemistry,Immunophenotyping,Ischemia,Ischemia: complications,Kidney,Kidney Diseases,Kidney Diseases: etiology,Kidney Diseases: surgery,Kidney: blood supply,Kidney: pathology,Leukocytes, Mononuclear,Leukocytes, Mononuclear: cytology,Leukocytes, Mononuclear: metabolism,Leukocytes, Mononuclear: transplantation,Male,Mice,Mice, Inbred C57BL,Proto-Oncogene Proteins c-kit,Proto-Oncogene Proteins c-kit: analysis,Reverse Transcriptase Polymerase Chain Reaction,acute-kidney-injury,cyno-project},
- mendeley-tags = {acute-kidney-injury,cyno-project},
- month = {may},
- number = {5},
- pages = {685--95},
- pmid = {20308984},
- title = {{Bone marrow mononuclear cells attenuate fibrosis development after severe acute kidney injury.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/20308984},
- volume = {90},
- year = {2010}
- }
- @article{Gillespie2010,
- abstract = {Large scale microarray experiments are becoming increasingly routine, particularly those which track a number of different cell lines through time. This time-course information provides valuable insight into the dynamic mechanisms underlying the biological processes being observed. However, proper statistical analysis of time-course data requires the use of more sophisticated tools and complex statistical models.},
- author = {Gillespie, Colin S and Lei, Guiyuan and Boys, Richard J and Greenall, Amanda and Wilkinson, Darren J},
- doi = {10.1186/1756-0500-3-81},
- file = {:Users/ryan/Documents/Mendeley Desktop/Gillespie et al. - 2010 - Analysing time course microarray data using Bioconductor a case study using yeast2 Affymetrix arrays.pdf:pdf},
- issn = {1756-0500},
- journal = {BMC research notes},
- month = {jan},
- pages = {81},
- pmid = {20302631},
- title = {{Analysing time course microarray data using Bioconductor: a case study using yeast2 Affymetrix arrays.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2880961{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {3},
- year = {2010}
- }
- @article{Meyer2011,
- abstract = {Progression through mitosis requires the sequential ubiquitination of cell cycle regulators by the anaphase-promoting complex, resulting in their proteasomal degradation. Although several mechanisms contribute to APC/C regulation during mitosis, the APC/C is able to discriminate between its many substrates by exploiting differences in the processivity of ubiquitin chain assembly. Here, we discuss how the APC/C achieves processive ubiquitin chain formation to trigger the sequential degradation of cell cycle regulators during mitosis.},
- author = {Meyer, Hermann-Josef and Rape, Michael},
- doi = {10.1016/j.semcdb.2011.03.009},
- file = {:Users/ryan/Documents/Mendeley Desktop/Meyer, Rape - 2011 - Processive ubiquitin chain formation by the anaphase-promoting complex.pdf:pdf},
- issn = {1096-3634},
- journal = {Seminars in cell {\&} developmental biology},
- keywords = {Animals,Cell Nucleus,Cell Nucleus: genetics,Cell Nucleus: metabolism,Gene Expression Regulation, Developmental,Humans,Mammals,Mitosis,Proteasome Endopeptidase Complex,Proteasome Endopeptidase Complex: genetics,Proteasome Endopeptidase Complex: metabolism,Protein Binding,Protein Binding: genetics,Proteolysis,Substrate Specificity,Ubiquitin,Ubiquitin-Conjugating Enzymes,Ubiquitin-Conjugating Enzymes: genetics,Ubiquitin-Conjugating Enzymes: metabolism,Ubiquitin-Protein Ligase Complexes,Ubiquitin-Protein Ligase Complexes: genetics,Ubiquitin-Protein Ligase Complexes: metabolism,Ubiquitin: genetics,Ubiquitin: metabolism,Ubiquitination},
- month = {aug},
- number = {6},
- pages = {544--50},
- pmid = {21477659},
- publisher = {Elsevier Ltd},
- title = {{Processive ubiquitin chain formation by the anaphase-promoting complex.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/21477659},
- volume = {22},
- year = {2011}
- }
- @inproceedings{Kleinberg2002,
- abstract = {Although the study of clustering is centered around an intuitively compelling goal, it has been very difficult to develop a unified framework for reasoning about it at a technical level, and profoundly diverse approaches to clustering abound in the research community. Here we suggest a formal perspective on the difficulty in finding such a unification, in the form of an impossibility theorem: for a set of three simple properties, we show that there is no clustering function satisfying all three. Relaxations of these properties expose some of the interesting (and unavoidable) trade-offs at work in well-studied clustering techniques such as single-linkage, sum-of-pairs, k-means, and k-median.},
- address = {Cambridge, MA, USA},
- author = {Kleinberg, Jon},
- booktitle = {Proceedings of the 15th International Conference on Neural Information Processing Systems},
- file = {:Users/ryan/Documents/Mendeley Desktop/Kleinberg - 2002 - An Impossibility Theorem for Clustering.pdf:pdf},
- month = {jul},
- pages = {463--470},
- publisher = {MIT Press},
- title = {{An Impossibility Theorem for Clustering}},
- url = {http://dl.acm.org/citation.cfm?id=2968618.2968676},
- year = {2002}
- }
- @article{Pinheiro2012,
- abstract = {Genetic information storage and processing rely on just two polymers, DNA and RNA, yet whether their role reflects evolutionary history or fundamental functional constraints is currently unknown. With the use of polymerase evolution and design, we show that genetic information can be stored in and recovered from six alternative genetic polymers based on simple nucleic acid architectures not found in nature [xeno-nucleic acids (XNAs)]. We also select XNA aptamers, which bind their targets with high affinity and specificity, demonstrating that beyond heredity, specific XNAs have the capacity for Darwinian evolution and folding into defined structures. Thus, heredity and evolution, two hallmarks of life, are not limited to DNA and RNA but are likely to be emergent properties of polymers capable of information storage.},
- author = {Pinheiro, Vitor B and Taylor, Alexander I and Cozens, Christopher and Abramov, Mikhail and Renders, Marleen and Zhang, Su and Chaput, John C and Wengel, Jesper and Peak-Chew, Sew-Yeu and McLaughlin, Stephen H and Herdewijn, Piet and Holliger, Philipp},
- doi = {10.1126/science.1217622},
- file = {:Users/ryan/Documents/Mendeley Desktop/Pinheiro et al. - 2012 - Synthetic genetic polymers capable of heredity and evolution.pdf:pdf},
- issn = {1095-9203},
- journal = {Science (New York, N.Y.)},
- keywords = {Aptamers, Nucleotide,Aptamers, Nucleotide: chemistry,Aptamers, Nucleotide: genetics,Aptamers, Nucleotide: metabolism,DNA,DNA-Directed DNA Polymerase,DNA-Directed DNA Polymerase: chemistry,DNA-Directed DNA Polymerase: genetics,DNA-Directed DNA Polymerase: metabolism,DNA: chemistry,DNA: genetics,Directed Molecular Evolution,Evolution, Molecular,Molecular Mimicry,Nucleic Acids,Nucleic Acids: chemistry,Nucleic Acids: genetics,Nucleic Acids: metabolism,Polymers,Polymers: chemistry,Polymers: metabolism,RNA,RNA-Directed DNA Polymerase,RNA-Directed DNA Polymerase: chemistry,RNA-Directed DNA Polymerase: metabolism,RNA: chemistry,RNA: genetics,Reverse Transcription,Templates, Genetic,Transcription, Genetic},
- month = {apr},
- number = {6079},
- pages = {341--4},
- pmid = {22517858},
- title = {{Synthetic genetic polymers capable of heredity and evolution.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/22517858},
- volume = {336},
- year = {2012}
- }
- @article{Lewin2007,
- author = {Lewin, Alex and Bochkina, Natalia and Richardson, Sylvia},
- doi = {10.2202/1544-6115.1314},
- file = {:Users/ryan/Documents/Mendeley Desktop/Lewin, Bochkina, Richardson - 2007 - Fully Bayesian Mixture Model for Differential Gene Expression Simulations and Model Checks.pdf:pdf},
- issn = {1544-6115},
- journal = {Statistical Applications in Genetics and Molecular Biology},
- month = {jan},
- number = {1},
- title = {{Fully Bayesian Mixture Model for Differential Gene Expression: Simulations and Model Checks}},
- url = {http://www.degruyter.com/view/j/sagmb.2007.6.1/sagmb.2007.6.1.1314/sagmb.2007.6.1.1314.xml},
- volume = {6},
- year = {2007}
- }
- @article{Luo2013,
- author = {Luo, W. and Brouwer, C.},
- doi = {10.1093/bioinformatics/btt285},
- file = {:Users/ryan/Documents/Mendeley Desktop/Luo, Brouwer - 2013 - Pathview an RBioconductor package for pathway based data integration and visualization.pdf:pdf},
- issn = {1367-4803},
- journal = {Bioinformatics},
- month = {jun},
- pages = {1--2},
- title = {{Pathview: an R/Bioconductor package for pathway based data integration and visualization}},
- url = {http://bioinformatics.oxfordjournals.org/cgi/doi/10.1093/bioinformatics/btt285},
- year = {2013}
- }
- @article{Robinson2010a,
- abstract = {SUMMARY: It is expected that emerging digital gene expression (DGE) technologies will overtake microarray technologies in the near future for many functional genomics applications. One of the fundamental data analysis tasks, especially for gene expression studies, involves determining whether there is evidence that counts for a transcript or exon are significantly different across experimental conditions. edgeR is a Bioconductor software package for examining differential expression of replicated count data. An overdispersed Poisson model is used to account for both biological and technical variability. Empirical Bayes methods are used to moderate the degree of overdispersion across transcripts, improving the reliability of inference. The methodology can be used even with the most minimal levels of replication, provided at least one phenotype or experimental condition is replicated. The software may have other applications beyond sequencing data, such as proteome peptide count data.
- AVAILABILITY: The package is freely available under the LGPL licence from the Bioconductor web site (http://bioconductor.org).},
- author = {Robinson, Mark D and McCarthy, Davis J and Smyth, Gordon K},
- doi = {10.1093/bioinformatics/btp616},
- file = {:Users/ryan/Documents/Mendeley Desktop/Robinson, McCarthy, Smyth - 2010 - edgeR a Bioconductor package for differential expression analysis of digital gene expression data.pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {Algorithms,Gene Expression Profiling,Gene Expression Profiling: methods,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Programming Languages,Signal Processing, Computer-Assisted,Software},
- month = {jan},
- number = {1},
- pages = {139--40},
- pmid = {19910308},
- title = {{edgeR: a Bioconductor package for differential expression analysis of digital gene expression data.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2796818{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {26},
- year = {2010}
- }
- @article{Hull2013,
- abstract = {BACKGROUND: The oxidative burst is one of the major antimicrobial mechanisms adopted by macrophages. The WKY rat strain is uniquely susceptible to experimentally induced macrophage-dependent crescentic glomerulonephritis (Crgn). We previously identified the AP-1 transcription factor JunD as a determinant of macrophage activation in WKY bone marrow-derived macrophages (BMDMs). JunD is over-expressed in WKY BMDMs and its silencing reduces Fc receptor-mediated oxidative burst in these cells.$\backslash$n$\backslash$nRESULTS: Here we combined Jund RNA interference with microarray analyses alongside ChIP-sequencing (ChIP-Seq) analyses in WKY BMDMs to investigate JunD-mediated control of macrophage activation in basal and lipopolysaccharide (LPS) stimulated cells. Microarray analysis following Jund silencing showed that Jund activates and represses gene expression with marked differential expression ({\textgreater}3 fold) for genes linked with oxidative stress and IL-1$\beta$ expression. These results were complemented by comparing whole genome expression in WKY BMDMs with Jund congenic strain (WKY.LCrgn2) BMDMs which express lower levels of JunD. ChIP-Seq analyses demonstrated that the increased expression of JunD resulted in an increased number of binding events in WKY BMDMs compared to WKY.LCrgn2 BMDMs. Combined ChIP-Seq and microarray analysis revealed a set of primary JunD-targets through which JunD exerts its effect on oxidative stress and IL-1$\beta$ synthesis in basal and LPS-stimulated macrophages.$\backslash$n$\backslash$nCONCLUSIONS: These findings demonstrate how genetically determined levels of a transcription factor affect its binding sites in primary cells and identify JunD as a key regulator of oxidative stress and IL-1$\beta$ synthesis in primary macrophages, which may play a role in susceptibility to Crgn.},
- author = {Hull, Richard P. and Srivastava, Prashant K. and D'Souza, Zelpha and Atanur, Santosh S. and Mechta-Grigoriou, Fatima and Game, Laurence and Petretto, Enrico and Cook, H. Terence and Aitman, Timothy J. and Behmoaras, Jacques},
- doi = {10.1186/1471-2164-14-92},
- file = {:Users/ryan/Documents/Mendeley Desktop/Hull et al. - 2013 - Combined ChIP-Seq and transcriptome analysis identifies AP-1JunD as a primary regulator of oxidative stress and IL.pdf:pdf},
- isbn = {1471-2164 (Electronic)$\backslash$n1471-2164 (Linking)},
- issn = {14712164},
- journal = {BMC Genomics},
- number = {1},
- pmid = {23398888},
- title = {{Combined ChIP-Seq and transcriptome analysis identifies AP-1/JunD as a primary regulator of oxidative stress and IL-1$\beta$ synthesis in macrophages}},
- url = {https://bmcgenomics.biomedcentral.com/track/pdf/10.1186/1471-2164-14-92?site=bmcgenomics.biomedcentral.com},
- volume = {14},
- year = {2013}
- }
- @article{Majumdar2003,
- abstract = {We have characterized adhesion molecules on the surface of multipotential human mesenchymal stem cells (hMSCs) and identified molecules whose ligands are present on mature hematopoietic cells. Flow cytometric analysis of hMSCs identified the expression of integrins: alpha1, alpha2, alpha3, alpha5, alpha6, alphav, beta1, beta3, and beta4, in addition to ICAM-1, ICAM-2, VCAM-1, CD72, and LFA-3. Exposure of hMSCs to IL-1alpha, TNFalpha or IFNgamma up-modulated ICAM-1 surface expression, whereas only IFNgamma increased both HLA-class I and -class II molecules on the cell surface. Whole cell-binding assays between the hMSCs and hematopoietic cell lines showed that T lymphocytic lines bound hMSCs with higher affinity than lines of either B lymphocytes or those of myeloid lineage. Experiments using autologous T lymphocytes isolated from peripheral blood mononuclear cells showed that hMSCs exhibited increased affinity for activated T-lymphocytes compared to resting T cells by quantitative whole cell binding and rosetting assays. Flow cytometric analysis of rosetted cells demonstrated that both CD4+ and CD8+ cells bound to hMSCs. To determine the functional significance of these findings, we tested the ability of hMSCs to present antigen to T lymphocytes. hMSCs pulsed with tetanus toxoid stimulated proliferation and cytokine production (IL-4, IL-10, and IFNgamma) in a tetanus-toxoid-specific T cell line. Maximal cytokine production correlated with maximal antigen-dependent proliferation. These data demonstrate physiological outcome as a consequence of interactions between hMSCs and human hematopoietic lineage cells, suggesting a role for hMSCs in vivo to influence both hematopoietic and immune function(s).},
- author = {Majumdar, Manas K. and Keane-Moore, Michele and Buyaner, Diana and Hardy, Wayne B. and Moorman, Mark A. and McIntosh, Kevin R. and Mosca, Joseph D.},
- doi = {10.1159/000068710},
- file = {:Users/ryan/Documents/Mendeley Desktop/Majumdar et al. - 2003 - Characterization and Functionality of Cell Surface Molecules on Human Mesenchymal Stem Cells.pdf:pdf},
- issn = {1423-0127},
- journal = {Journal of Biomedical Science},
- keywords = {Antigen presentation,Hematopoietic interactions,Immune function,Mesenchymal stem cells,T lymphocytes},
- number = {2},
- pages = {228--241},
- title = {{Characterization and Functionality of Cell Surface Molecules on Human Mesenchymal Stem Cells}},
- url = {http://www.karger.com/doi/10.1159/000068710},
- volume = {10},
- year = {2003}
- }
- @article{Sciuto2018,
- abstract = {Coimmunoprecipitation (co-IP) is one of the most frequently used techniques to study protein-protein (PPIs) or protein-nucleic acid interactions (PNIs). However, the presence of coprecipitated contaminants is a well-recognized issue associated with single-step co-IPs. To overcome this limitation, we developed the two-step co-IP (TIP) strategy that enables sequential coimmunoprecipitations of endogenous protein complexes. TIP can be performed with a broad range of mono- and polyclonal antibodies targeting a single protein or different components of a given complex. TIP results in a highly selective enrichment of protein complexes and thus outperforms single-step co-IPs for downstream applications such as mass spectrometry for the identification of PPIs and quantitative PCR for the analysis of PNIs. We benchmarked TIP for the identification of CD95/FAS-interacting proteins in primary human CD4+ T cells, which recapitulated all major known interactors, but also enabled the proteomics discovery of PPM1G and IPO7 as new interaction partners. For its feasibility and high performance, we propose TIP as an advanced tool for the isolation of highly purified protein-protein and protein-nucleic acid complexes under native expression conditions.},
- author = {Sciuto, Maria Rita and Warnken, Uwe and Schn{\"{o}}lzer, Martina and Valvo, Cecilia and Brunetto, Lidia and Boe, Alessandra and Biffoni, Mauro and Krammer, Peter H. and {De Maria}, Ruggero and Haas, Tobias L.},
- doi = {10.1074/mcp.O116.065920},
- file = {:Users/ryan/Documents/Mendeley Desktop/Sciuto et al. - 2018 - Two-Step Coimmunoprecipitation (TIP) enables efficient and highly selective isolation of native protein complexes.pdf:pdf},
- issn = {15359484},
- journal = {Molecular and Cellular Proteomics},
- number = {5},
- pages = {993--1009},
- title = {{Two-Step Coimmunoprecipitation (TIP) enables efficient and highly selective isolation of native protein complexes}},
- url = {https://www.mcponline.org/content/mcprot/17/5/993.full.pdf},
- volume = {17},
- year = {2018}
- }
- @article{Faraway2002,
- author = {Faraway, JJ},
- file = {:Users/ryan/Documents/Mendeley Desktop/Faraway - 2002 - Practical Regression and ANOVA using R.pdf:pdf},
- number = {July},
- title = {{Practical Regression and ANOVA using R}},
- url = {http://csyue.nccu.edu.tw/Practical Regression and Anova using R.pdf},
- year = {2002}
- }
- @article{Froussios2016,
- author = {Froussios, Kimon and Schurch, Nicholas J and Mackinnon, Katarzyna and Gierlinski, Marek and Duc, C{\'{e}}line and Simpson, Gordon G and Barton, Geoffrey J},
- file = {:Users/ryan/Documents/Mendeley Desktop/Froussios et al. - 2016 - How well do RNA-Seq differential gene expression tools perform in higher eukaryotes.pdf:pdf},
- journal = {bioRxiv},
- pages = {1--19},
- title = {{How well do RNA-Seq differential gene expression tools perform in higher eukaryotes?}},
- url = {http://biorxiv.org/content/biorxiv/early/2016/12/02/090753.full.pdf},
- year = {2016}
- }
- @article{Urbut2016,
- author = {Urbut, Sarah Margaret and Wang, Gao and Stephens, Matthew},
- doi = {10.1101/096552},
- file = {:Users/ryan/Documents/Mendeley Desktop/Urbut, Wang, Stephens - 2016 - Flexible statistical methods for estimating and testing effects in genomic studies with multiple conditio.pdf:pdf},
- title = {{Flexible statistical methods for estimating and testing effects in genomic studies with multiple conditions}},
- url = {http://www.biorxiv.org/content/biorxiv/early/2017/05/09/096552.full.pdf},
- year = {2016}
- }
- @article{Zhou2011,
- abstract = {A number of penalization and shrinkage approaches have been proposed for the analysis of microarray gene expression data. Similar techniques are now routinely applied to RNA sequence transcriptional count data, although the value of such shrinkage has not been conclusively established. If penalization is desired, the explicit modeling of mean-variance relationships provides a flexible testing regimen that 'borrows' information across genes, while easily incorporating design effects and additional covariates.},
- author = {Zhou, Yi-Hui and Xia, Kai and Wright, Fred a},
- doi = {10.1093/bioinformatics/btr449},
- file = {:Users/ryan/Documents/Mendeley Desktop/Zhou, Xia, Wright - 2011 - A powerful and flexible approach to the analysis of RNA sequence count data.pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {Base Sequence,Base Sequence: genetics,Gene Expression,Gene Expression Profiling,Gene Expression Profiling: methods,Models, Genetic,Models, Statistical,Sequence Analysis, RNA,Sequence Analysis, RNA: methods,Software,Transcription, Genetic,Transcriptome,Transcriptome: genetics},
- month = {oct},
- number = {19},
- pages = {2672--8},
- pmid = {21810900},
- title = {{A powerful and flexible approach to the analysis of RNA sequence count data.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/21810900},
- volume = {27},
- year = {2011}
- }
- @article{Wilbanks2010,
- abstract = {Next-generation DNA sequencing coupled with chromatin immunoprecipitation (ChIP-seq) is revolutionizing our ability to interrogate whole genome protein-DNA interactions. Identification of protein binding sites from ChIP-seq data has required novel computational tools, distinct from those used for the analysis of ChIP-Chip experiments. The growing popularity of ChIP-seq spurred the development of many different analytical programs (at last count, we noted 31 open source methods), each with some purported advantage. Given that the literature is dense and empirical benchmarking challenging, selecting an appropriate method for ChIP-seq analysis has become a daunting task. Herein we compare the performance of eleven different peak calling programs on common empirical, transcription factor datasets and measure their sensitivity, accuracy and usability. Our analysis provides an unbiased critical assessment of available technologies, and should assist researchers in choosing a suitable tool for handling ChIP-seq data.},
- author = {Wilbanks, Elizabeth G and Facciotti, Marc T},
- doi = {10.1371/journal.pone.0011471},
- file = {:Users/ryan/Documents/Mendeley Desktop/Wilbanks, Facciotti - 2010 - Evaluation of algorithm performance in ChIP-seq peak detection.pdf:pdf},
- issn = {1932-6203},
- journal = {PloS one},
- keywords = {Algorithms,Chromatin Immunoprecipitation,Chromatin Immunoprecipitation: methods,Sequence Analysis, DNA,Sequence Analysis, DNA: methods},
- month = {jan},
- number = {7},
- pages = {e11471},
- pmid = {20628599},
- title = {{Evaluation of algorithm performance in ChIP-seq peak detection.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2900203{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {5},
- year = {2010}
- }
- @article{Chuen,
- author = {Chuen, By and Tan, Seng},
- file = {:Users/ryan/Documents/Mendeley Desktop/Chuen, Tan - Unknown - Additional file 1 Correlating gene and protein expres- sion data using Correlated Factor Analysis.pdf:pdf},
- pages = {1--16},
- title = {{Additional file 1 : Correlating gene and protein expres- sion data using Correlated Factor Analysis}}
- }
- @article{Wang2011,
- author = {Wang, Liguo},
- file = {:Users/ryan/Documents/Mendeley Desktop/Wang - 2011 - The EVER-seq manual.pdf:pdf},
- title = {{The EVER-seq manual}},
- year = {2011}
- }
- @article{Trapnell,
- author = {Trapnell, C and Hendrickson, D G and Sauvageau, M and Goff, L and Rinn, J L and Pachter, L},
- doi = {10.1038/nbt.2450},
- file = {:Users/ryan/Documents/Mendeley Desktop/Trapnell et al. - Unknown - Differential analysis of gene regulation by hoxa1 at isoform resolution with rna-seq (supplemental material).pdf:pdf},
- title = {{Differential analysis of gene regulation by hoxa1 at isoform resolution with rna-seq (supplemental material)}}
- }
- @article{Zwiener2014,
- abstract = {Gene expression measurements have successfully been used for building prognostic signatures, i.e for identifying a short list of important genes that can predict patient outcome. Mostly microarray measurements have been considered, and there is little advice available for building multivariable risk prediction models from RNA-Seq data. We specifically consider penalized regression techniques, such as the lasso and componentwise boosting, which can simultaneously consider all measurements and provide both, multivariable regression models for prediction and automated variable selection. However, they might be affected by the typical skewness, mean-variance-dependency or extreme values of RNA-Seq covariates and therefore could benefit from transformations of the latter. In an analytical part, we highlight preferential selection of covariates with large variances, which is problematic due to the mean-variance dependency of RNA-Seq data. In a simulation study, we compare different transformations of RNA-Seq data for potentially improving detection of important genes. Specifically, we consider standardization, the log transformation, a variance-stabilizing transformation, the Box-Cox transformation, and rank-based transformations. In addition, the prediction performance for real data from patients with kidney cancer and acute myeloid leukemia is considered. We show that signature size, identification performance, and prediction performance critically depend on the choice of a suitable transformation. Rank-based transformations perform well in all scenarios and can even outperform complex variance-stabilizing approaches. Generally, the results illustrate that the distribution and potential transformations of RNA-Seq data need to be considered as a critical step when building risk prediction models by penalized regression techniques.},
- author = {Zwiener, Isabella and Frisch, Barbara and Binder, Harald},
- doi = {10.1371/journal.pone.0085150},
- file = {:Users/ryan/Documents/Mendeley Desktop/Zwiener, Frisch, Binder - 2014 - Transforming RNA-Seq Data to Improve the Performance of Prognostic Gene Signatures.pdf:pdf},
- issn = {1932-6203},
- journal = {PloS one},
- month = {jan},
- number = {1},
- pages = {e85150},
- pmid = {24416353},
- title = {{Transforming RNA-Seq Data to Improve the Performance of Prognostic Gene Signatures.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3885686{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {9},
- year = {2014}
- }
- @article{Volkmar2012,
- abstract = {In addition to genetic predisposition, environmental and lifestyle factors contribute to the pathogenesis of type 2 diabetes (T2D). Epigenetic changes may provide the link for translating environmental exposures into pathological mechanisms. In this study, we performed the first comprehensive DNA methylation profiling in pancreatic islets from T2D and non-diabetic donors. We uncovered 276 CpG loci affiliated to promoters of 254 genes displaying significant differential DNA methylation in diabetic islets. These methylation changes were not present in blood cells from T2D individuals nor were they experimentally induced in non-diabetic islets by exposure to high glucose. For a subgroup of the differentially methylated genes, concordant transcriptional changes were present. Functional annotation of the aberrantly methylated genes and RNAi experiments highlighted pathways implicated in beta-cell survival and function; some are implicated in cellular dysfunction while others facilitate adaptation to stressors. Together, our findings offer new insights into the intricate mechanisms of T2D pathogenesis, underscore the important involvement of epigenetic dysregulation in diabetic islets and may advance our understanding of T2D aetiology.},
- author = {Volkmar, Michael and Dedeurwaerder, Sarah and Cunha, Daniel A. and Ndlovu, Matladi N. and Defrance, Matthieu and Deplus, Rachel and Calonne, Emilie and Volkmar, Ute and Igoillo-Esteve, Mariana and Naamane, Najib and {Del Guerra}, Silvia and Masini, Matilde and Bugliani, Marco and Marchetti, Piero and Cnop, Miriam and Eizirik, Decio L. and Fuks, Fran{\c{c}}ois},
- doi = {10.1038/emboj.2011.503},
- file = {:Users/ryan/Documents/Mendeley Desktop/Volkmar et al. - 2012 - DNA methylation profiling identifies epigenetic dysregulation in pancreatic islets from type 2 diabetic patients.pdf:pdf},
- issn = {02614189},
- journal = {EMBO Journal},
- keywords = {DNA methylation,pancreatic islets,type 2 diabetes},
- number = {6},
- pages = {1405--1426},
- title = {{DNA methylation profiling identifies epigenetic dysregulation in pancreatic islets from type 2 diabetic patients}},
- volume = {31},
- year = {2012}
- }
- @article{Tsai2010,
- abstract = {Long intergenic noncoding RNAs (lincRNAs) regulate chromatin states and epigenetic inheritance. Here, we show that the lincRNA HOTAIR serves as a scaffold for at least two distinct histone modification complexes. A 5' domain of HOTAIR binds polycomb repressive complex 2 (PRC2), whereas a 3' domain of HOTAIR binds the LSD1/CoREST/REST complex. The ability to tether two distinct complexes enables RNA-mediated assembly of PRC2 and LSD1 and coordinates targeting of PRC2 and LSD1 to chromatin for coupled histone H3 lysine 27 methylation and lysine 4 demethylation. Our results suggest that lincRNAs may serve as scaffolds by providing binding surfaces to assemble select histone modification enzymes, thereby specifying the pattern of histone modifications on target genes.},
- author = {Tsai, M.-C. and Manor, Ohad and Wan, Yue and Mosammaparast, Nima and Wang, Jordon K and Lan, Fei and Shi, Yang and Segal, Eran and Chang, Howard Y},
- doi = {10.1126/science.1192002},
- file = {:Users/ryan/Documents/Mendeley Desktop/Tsai et al. - 2010 - Long Noncoding RNA as Modular Scaffold of Histone Modification Complexes.pdf:pdf},
- isbn = {1095-9203 (Electronic)$\backslash$r0036-8075 (Linking)},
- issn = {0036-8075},
- journal = {Science},
- month = {aug},
- number = {5992},
- pages = {689--693},
- pmid = {20616235},
- title = {{Long Noncoding RNA as Modular Scaffold of Histone Modification Complexes}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/20616235{\%}5Cnhttp://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC2967777 http://www.sciencemag.org/cgi/doi/10.1126/science.1192002},
- volume = {329},
- year = {2010}
- }
- @article{Smith2012,
- abstract = {Fifty per cent of the genome is discontinuously replicated on the lagging strand as Okazaki fragments. Eukaryotic Okazaki fragments remain poorly characterized and, because nucleosomes are rapidly deposited on nascent DNA, Okazaki fragment processing and nucleosome assembly potentially affect one another. Here we show that ligation-competent Okazaki fragments in Saccharomyces cerevisiae are sized according to the nucleosome repeat. Using deep sequencing, we demonstrate that ligation junctions preferentially occur near nucleosome midpoints rather than in internucleosomal linker regions. Disrupting chromatin assembly or lagging-strand polymerase processivity affects both the size and the distribution of Okazaki fragments, suggesting a role for nascent chromatin, assembled immediately after the passage of the replication fork, in the termination of Okazaki fragment synthesis. Our studies represent the first high-resolution analysis--to our knowledge--of eukaryotic Okazaki fragments in vivo, and reveal the interconnection between lagging-strand synthesis and chromatin assembly.},
- author = {Smith, Duncan J and Whitehouse, Iestyn},
- doi = {10.1038/nature10895},
- file = {:Users/ryan/Documents/Mendeley Desktop/Smith, Whitehouse - 2012 - Intrinsic coupling of lagging-strand synthesis to chromatin assembly.pdf:pdf},
- issn = {1476-4687},
- journal = {Nature},
- keywords = {Chromatin Assembly and Disassembly,Chromatin Assembly and Disassembly: physiology,DNA,DNA Ligases,DNA Ligases: deficiency,DNA Ligases: metabolism,DNA Polymerase III,DNA Polymerase III: metabolism,DNA Replication,DNA-Binding Proteins,DNA-Binding Proteins: metabolism,DNA: biosynthesis,DNA: genetics,DNA: metabolism,High-Throughput Nucleotide Sequencing,Nucleosomes,Nucleosomes: genetics,Nucleosomes: metabolism,Protein Binding,Saccharomyces cerevisiae,Saccharomyces cerevisiae Proteins,Saccharomyces cerevisiae Proteins: metabolism,Saccharomyces cerevisiae: enzymology,Saccharomyces cerevisiae: genetics,Transcription Factors,Transcription Factors: metabolism},
- month = {mar},
- number = {7390},
- pages = {434--8},
- pmid = {22419157},
- publisher = {Nature Publishing Group},
- title = {{Intrinsic coupling of lagging-strand synthesis to chromatin assembly.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/22419157},
- volume = {483},
- year = {2012}
- }
- @article{Wu2010b,
- abstract = {Next-generation sequencing captures sequence differences in reads relative to a reference genome or transcriptome, including splicing events and complex variants involving multiple mismatches and long indels. We present computational methods for fast detection of complex variants and splicing in short reads, based on a successively constrained search process of merging and filtering position lists from a genomic index. Our methods are implemented in GSNAP (Genomic Short-read Nucleotide Alignment Program), which can align both single- and paired-end reads as short as 14 nt and of arbitrarily long length. It can detect short- and long-distance splicing, including interchromosomal splicing, in individual reads, using probabilistic models or a database of known splice sites. Our program also permits SNP-tolerant alignment to a reference space of all possible combinations of major and minor alleles, and can align reads from bisulfite-treated DNA for the study of methylation state.},
- author = {Wu, Thomas D and Nacu, Serban},
- doi = {10.1093/bioinformatics/btq057},
- file = {:Users/ryan/Documents/Mendeley Desktop/Wu, Nacu - 2010 - Fast and SNP-tolerant detection of complex variants and splicing in short reads.pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {Base Sequence,DNA, Recombinant,Genetic Variation,Genomics,Genomics: methods,Polymorphism, Single Nucleotide,RNA Splicing},
- month = {apr},
- number = {7},
- pages = {873--81},
- pmid = {20147302},
- title = {{Fast and SNP-tolerant detection of complex variants and splicing in short reads.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2844994{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {26},
- year = {2010}
- }
- @article{Harrow2012,
- abstract = {The GENCODE Consortium aims to identify all gene features in the human genome using a combination of computational analysis, manual annotation, and experimental validation. Since the first public release of this annotation data set, few new protein-coding loci have been added, yet the number of alternative splicing transcripts annotated has steadily increased. The GENCODE 7 release contains 20,687 protein-coding and 9640 long noncoding RNA loci and has 33,977 coding transcripts not represented in UCSC genes and RefSeq. It also has the most comprehensive annotation of long noncoding RNA (lncRNA) loci publicly available with the predominant transcript form consisting of two exons. We have examined the completeness of the transcript annotation and found that 35{\%} of transcriptional start sites are supported by CAGE clusters and 62{\%} of protein-coding genes have annotated polyA sites. Over one-third of GENCODE protein-coding genes are supported by peptide hits derived from mass spectrometry spectra submitted to Peptide Atlas. New models derived from the Illumina Body Map 2.0 RNA-seq data identify 3689 new loci not currently in GENCODE, of which 3127 consist of two exon models indicating that they are possibly unannotated long noncoding loci. GENCODE 7 is publicly available from gencodegenes.org and via the Ensembl and UCSC Genome Browsers.},
- author = {Harrow, Jennifer and Frankish, Adam and Gonzalez, Jose M. and Tapanari, Electra and Diekhans, Mark and Kokocinski, Felix and Aken, Bronwen L. and Barrell, Daniel and Zadissa, Amonida and Searle, Stephen and Barnes, If and Bignell, Alexandra and Boychenko, Veronika and Hunt, Toby and Kay, Mike and Mukherjee, Gaurab and Rajan, Jeena and Despacio-Reyes, Gloria and Saunders, Gary and Steward, Charles and Harte, Rachel and Lin, Michael and Howald, C{\'{e}}dric and Tanzer, Andrea and Derrien, Thomas and Chrast, Jacqueline and Walters, Nathalie and Balasubramanian, Suganthi and Pei, Baikang and Tress, Michael and Rodriguez, Jose Manuel and Ezkurdia, Iakes and {Van Baren}, Jeltje and Brent, Michael and Haussler, David and Kellis, Manolis and Valencia, Alfonso and Reymond, Alexandre and Gerstein, Mark and Guig{\'{o}}, Roderic and Hubbard, Tim J.},
- doi = {10.1101/gr.135350.111},
- file = {:Users/ryan/Documents/Mendeley Desktop/Harrow et al. - 2012 - GENCODE The reference human genome annotation for the ENCODE project.pdf:pdf},
- issn = {10889051},
- journal = {Genome Research},
- number = {9},
- pages = {1760--1774},
- title = {{GENCODE: The reference human genome annotation for the ENCODE project}},
- volume = {22},
- year = {2012}
- }
- @phdthesis{LaMere2015,
- abstract = {CD4 T cells undergo activation and differentiation into various cellular subtypes in response to antigen. Memory T cells are known to be primed for rapid responses, but the epigenetic influences upon the process of activation and the formation of memory cells are still poorly defined. Two major epigenetic mechanisms influencing the regulation of gene expression include CpG methylation and histone modifications. We developed a novel method to analyze CpG methylation in order to interrogate the CpG methylation status of 2100 gene promoters in naïve and memory CD4 T cells. From these data, we demonstrate that CpG methylation profiling of a relatively small gene set can distinguish memory from naïve subsets. Additionally, we identify a class of primed genes in memory cells that are putatively regulated by CpG methylation, many of which have not previously been studied in T cells. In addition to our CpG methylation studies, we have profiled 3 histone modifications in naïve and memory CD4 T cells during activation. H3K4 and H3K27 methylation are frequently studied in the context of their association with gene expression, but their function during CD4 T cell activation has not been determined. Using ChIPseq for H3K4me2, H3K4me3 and H3K27me3 alongside RNAseq in naïve and memory human CD4 T cells at rest and after activation, we have defined the roles these modifications are playing throughout the process of activation and linked them back to regulation of key pathways in T cell activation and differentiation. Our results demonstrate that promoter H3K4 methylation provides a feed-forward mechanism for upregulating RNA expression during activation, while changes to promoter H3K27me3 after activation reinforce baseline expression at rest. H3K27me3 demethylation is a prominent finding in both naïve and memory cells early in activation, and inhibiting this demethylation leads to proliferation defects, blunted CD25 upregulation, cytokine perturbations, and cell cycle disruptions in naïve CD4 T cells, demonstrating that H3K27me3 demethylation is integral to CD4 T cell activation. Our results enhance our understanding of the role these epigenetic modifications play during CD4 T cell activation and underscore key differences between naïve and memory cells in their activation dynamics.},
- author = {LaMere, Sarah Adrianne Hutchison},
- file = {:Users/ryan/Documents/Mendeley Desktop/LaMere - 2015 - Dynamic epigenetic regulation of CD4 T cell activation and memory formation.pdf:pdf},
- school = {The Scripps Research Institute},
- title = {{Dynamic epigenetic regulation of CD4 T cell activation and memory formation}},
- year = {2015}
- }
- @article{OHara2010,
- author = {O'Hara, Robert B. and Kotze, D. Johan},
- doi = {10.1111/j.2041-210X.2010.00021.x},
- file = {:Users/ryan/Documents/Mendeley Desktop/O'Hara, Kotze - 2010 - Do not log‐transform count data.pdf:pdf},
- issn = {2041210X},
- journal = {Methods in Ecology and Evolution},
- month = {mar},
- number = {2},
- pages = {118--122},
- title = {{Do not log-transform count data}},
- url = {http://onlinelibrary.wiley.com/doi/10.1111/j.2041-210X.2010.00021.x/full http://doi.wiley.com/10.1111/j.2041-210X.2010.00021.x},
- volume = {1},
- year = {2010}
- }
- @article{Kadota2012,
- abstract = {BACKGROUND: High-throughput sequencing, such as ribonucleic acid sequencing (RNA-seq) and chromatin immunoprecipitation sequencing (ChIP-seq) analyses, enables various features of organisms to be compared through tag counts. Recent studies have demonstrated that the normalization step for RNA-seq data is critical for a more accurate subsequent analysis of differential gene expression. Development of a more robust normalization method is desirable for identifying the true difference in tag count data.
- RESULTS: We describe a strategy for normalizing tag count data, focusing on RNA-seq. The key concept is to remove data assigned as potential differentially expressed genes (DEGs) before calculating the normalization factor. Several R packages for identifying DEGs are currently available, and each package uses its own normalization method and gene ranking algorithm. We compared a total of eight package combinations: four R packages (edgeR, DESeq, baySeq, and NBPSeq) with their default normalization settings and with our normalization strategy. Many synthetic datasets under various scenarios were evaluated on the basis of the area under the curve (AUC) as a measure for both sensitivity and specificity. We found that packages using our strategy in the data normalization step overall performed well. This result was also observed for a real experimental dataset.
- CONCLUSION: Our results showed that the elimination of potential DEGs is essential for more accurate normalization of RNA-seq data. The concept of this normalization strategy can widely be applied to other types of tag count data and to microarray data.},
- author = {Kadota, Koji and Nishiyama, Tomoaki and Shimizu, Kentaro},
- doi = {10.1186/1748-7188-7-5},
- file = {:Users/ryan/Documents/Mendeley Desktop/Kadota, Nishiyama, Shimizu - 2012 - A normalization strategy for comparing tag count data.pdf:pdf},
- issn = {1748-7188},
- journal = {Algorithms for molecular biology : AMB},
- month = {jan},
- number = {1},
- pages = {5},
- pmid = {22475125},
- publisher = {BioMed Central Ltd},
- title = {{A normalization strategy for comparing tag count data.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3341196{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {7},
- year = {2012}
- }
- @article{Smyth1999,
- abstract = {This paper considers double generalized linear models, which allow the mean and dispersion to be modelled simultaneously in a generalized linear model context. Estimation of the dispersion parameters is based on a $\chi$ 21approximation to the unit deviances, and the accuracy of the saddle-point approximation which underlies this is discussed. Approximate REML methods are developed for estimation of the dispersion. The approximate REML methods can be implemented with very little added complication in a generalized linear model setting by adjusting the working vector and working weights. S-Plus functions for double generalized linear models are described. Through two data examples it is shown that the approximate REML methods are more robust than maximum likelihood, in the sense of being less sensitive to perturbations in the mean model. Copyright {\textcopyright} 1999 John Wiley {\&} Sons, Ltd.},
- author = {Smyth, Gordon K. and Verbyla, Arūnas P.},
- doi = {10.1002/(SICI)1099-095X(199911/12)10:6<695::AID-ENV385>3.0.CO;2-M},
- file = {:Users/ryan/Documents/Mendeley Desktop/Smyth, Verbyla - 1999 - Adjusted likelihood methods for modelling dispersion in generalized linear models.pdf:pdf},
- issn = {1099-095X},
- journal = {Environmetrics},
- keywords = {adjusted pro,dispersion modelling,generalized linear models,le,reml,slippage models},
- number = {6},
- pages = {695--709},
- title = {{Adjusted likelihood methods for modelling dispersion in generalized linear models}},
- url = {http://onlinelibrary.wiley.com/doi/10.1002/(SICI)1099-095X(199911/12)10:6{\%}3C695::AID-ENV385{\%}3E3.0.CO;2-M/abstract{\%}5Cnhttp://onlinelibrary.wiley.com/doi/10.1002/(SICI)1099-095X(199911/12)10:6{\%}3C695::AID-ENV385{\%}3E3.0.CO;2-M/abstract{\%}5Cnhttp://onlinelibrary.wiley.com/st},
- volume = {10},
- year = {1999}
- }
- @article{Li2010,
- abstract = {MOTIVATION: RNA-Seq is a promising new technology for accurately measuring gene expression levels. Expression estimation with RNA-Seq requires the mapping of relatively short sequencing reads to a reference genome or transcript set. Because reads are generally shorter than transcripts from which they are derived, a single read may map to multiple genes and isoforms, complicating expression analyses. Previous computational methods either discard reads that map to multiple locations or allocate them to genes heuristically. RESULTS: We present a generative statistical model and associated inference methods that handle read mapping uncertainty in a principled manner. Through simulations parameterized by real RNA-Seq data, we show that our method is more accurate than previous methods. Our improved accuracy is the result of handling read mapping uncertainty with a statistical model and the estimation of gene expression levels as the sum of isoform expression levels. Unlike previous methods, our method is capable of modeling non-uniform read distributions. Simulations with our method indicate that a read length of 20-25 bases is optimal for gene-level expression estimation from mouse and maize RNA-Seq data when sequencing throughput is fixed.},
- author = {Li, Bo and Ruotti, Victor and Stewart, Ron M and Thomson, James a and Dewey, Colin N},
- doi = {10.1093/bioinformatics/btp692},
- file = {:Users/ryan/Documents/Mendeley Desktop/Li et al. - 2010 - RNA-Seq gene expression estimation with read mapping uncertainty.pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {Algorithms,Animals,Base Sequence,Computational Biology,Computational Biology: methods,Databases, Genetic,Gene Expression,Gene Expression Profiling,Genome,Mice,Sequence Analysis, RNA,Sequence Analysis, RNA: methods,Software,Zea mays,Zea mays: genetics},
- month = {feb},
- number = {4},
- pages = {493--500},
- pmid = {20022975},
- title = {{RNA-Seq gene expression estimation with read mapping uncertainty.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2820677{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {26},
- year = {2010}
- }
- @article{Grant2011,
- abstract = {A critical task in high-throughput sequencing is aligning millions of short reads to a reference genome. Alignment is especially complicated for RNA sequencing (RNA-Seq) because of RNA splicing. A number of RNA-Seq algorithms are available, and claim to align reads with high accuracy and efficiency while detecting splice junctions. RNA-Seq data are discrete in nature; therefore, with reasonable gene models and comparative metrics RNA-Seq data can be simulated to sufficient accuracy to enable meaningful benchmarking of alignment algorithms. The exercise to rigorously compare all viable published RNA-Seq algorithms has not been performed previously.},
- author = {Grant, Gregory R and Farkas, Michael H and Pizarro, Angel D and Lahens, Nicholas F and Schug, Jonathan and Brunk, Brian P and Stoeckert, Christian J and Hogenesch, John B and Pierce, Eric a},
- doi = {10.1093/bioinformatics/btr427},
- file = {:Users/ryan/Documents/Mendeley Desktop/Grant et al. - 2011 - Comparative analysis of RNA-Seq alignment algorithms and the RNA-Seq unified mapper (RUM).pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {Algorithms,Animals,Base Sequence,Benchmarking,Cluster Analysis,Exons,Gene Library,Genome,High-Throughput Nucleotide Sequencing,Mice,Models, Genetic,Molecular Sequence Data,RNA,RNA Splicing,RNA: genetics,Sequence Alignment,Sequence Analysis, RNA,Sequence Analysis, RNA: methods,Software},
- month = {sep},
- number = {18},
- pages = {2518--28},
- pmid = {21775302},
- title = {{Comparative analysis of RNA-Seq alignment algorithms and the RNA-Seq unified mapper (RUM).}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/21775302},
- volume = {27},
- year = {2011}
- }
- @article{Scott2015,
- author = {Scott, James G. and Kelly, Ryan C. and Smith, Matthew A. and Zhou, Pengcheng and Kass, Robert E.},
- doi = {10.1080/01621459.2014.990973},
- file = {:Users/ryan/Documents/Mendeley Desktop/Scott et al. - 2015 - False Discovery Rate Regression An Application to Neural Synchrony Detection in Primary Visual Cortex.pdf:pdf},
- issn = {0162-1459},
- journal = {Journal of the American Statistical Association},
- month = {apr},
- number = {510},
- pages = {459--471},
- title = {{False Discovery Rate Regression: An Application to Neural Synchrony Detection in Primary Visual Cortex}},
- url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4743052/pdf/nihms-755047.pdf http://www.tandfonline.com/doi/full/10.1080/01621459.2014.990973},
- volume = {110},
- year = {2015}
- }
- @article{Kapourani2018a,
- abstract = {Measurements of DNA methylation at the single cell level are promising to revolutionise our understanding of epigenetic control of gene expression. Yet, intrinsic limitations of the technology result in very sparse coverage of CpG sites (around 5{\%} to 20{\%} coverage), effectively limiting the analysis repertoire to a semi-quantitative level. Here we introduce Melissa (MEthyLation Inference for Single cell Analysis), a Bayesian hierarchical method to quantify spatially-varying methylation profiles across genomic regions from single-cell bisulfite sequencing data (scBS-seq). Melissa clusters individual cells based on local methylation patterns, enabling the discovery of epigenetic differences and similarities among individual cells. The clustering also acts as an effective regularisation method for imputation of methylation on unassayed CpG sites, enabling transfer of information between individual cells. We show both on simulated and real data sets that Melissa provides accurate and biologically meaningful clusterings, and state-of-the-art imputation performance. An R implementation of Melissa is publicly available at https://github.com/andreaskapou/Melissa.},
- author = {Kapourani, Chantriolnt-Andreas and Sanguinetti, Guido},
- doi = {10.1101/312025},
- file = {:Users/ryan/Documents/Mendeley Desktop/Kapourani, Sanguinetti - 2018 - Melissa Bayesian clustering and imputation of single cell methylomes.pdf:pdf},
- journal = {bioRxiv},
- pages = {1--16},
- title = {{Melissa: Bayesian clustering and imputation of single cell methylomes}},
- url = {http://biorxiv.org/content/early/2018/05/01/312025.abstract},
- year = {2018}
- }
- @article{tHoen2013,
- author = {{'t Hoen}, Peter a C and Friedl{\"{a}}nder, Marc R and Alml{\"{o}}f, Jonas and Sammeth, Michael and Pulyakhina, Irina and Anvar, Seyed Yahya and Laros, Jeroen F J and Buermans, Henk P J and Karlberg, Olof and Br{\"{a}}nnvall, Mathias and van Ommen, Gert-Jan B and Estivill, Xavier and Guig{\'{o}}, Roderic and Syv{\"{a}}nen, Ann-Christine and Gut, Ivo G and Dermitzakis, Emmanouil T and Antonorakis, Stylianos E and Brazma, Alvis and Flicek, Paul and Schreiber, Stefan and Rosenstiel, Philip and Meitinger, Thomas and Strom, Tim M and Lehrach, Hans and Sudbrak, Ralf and Carracedo, Angel and van Iterson, Maarten and Monlong, Jean and Lizano, Esther and Bertier, Gabrielle and Ferreira, Pedro G and Ribeca, Paolo and Griebel, Thasso and Beltran, Sergi and Gut, Marta and Kahlem, Katja and Lappalainen, Tuuli and Giger, Thomas and Ongen, Halit and Padioleau, Ismael and Kilpinen, Helena and Gonz{\`{a}}lez-Porta, Mar and Kurbatova, Natalja and Tikhonov, Andrew and Greger, Liliana and Barann, Matthias and Esser, Daniela and H{\"{a}}sler, Robert and Wieland, Thomas and Schwarzmayr, Thomas and Sultan, Marc and Amstislavskiy, Vyacheslav and den Dunnen, Johan T},
- doi = {10.1038/nbt.2702},
- file = {:Users/ryan/Documents/Mendeley Desktop/'t Hoen et al. - 2013 - Reproducibility of high-throughput mRNA and small RNA sequencing across laboratories.pdf:pdf},
- issn = {1087-0156},
- journal = {Nature Biotechnology},
- month = {sep},
- number = {September},
- title = {{Reproducibility of high-throughput mRNA and small RNA sequencing across laboratories}},
- url = {http://www.nature.com/doifinder/10.1038/nbt.2702},
- year = {2013}
- }
- @article{Gunady2018,
- abstract = {Introduction: Analysis of differential alternative splicing from RNA-seq data is complicated by the fact that many RNA-seq reads map to multiple transcripts, besides, the annotated transcripts are often a small subset of the possible transcripts of a gene. Here we describe Yanagi, a tool for segmenting transcriptome to create a library of maximal L-disjoint segments from a complete transcriptome annotation. That segment library preserves all transcriptome substrings of length L and transcripts structural relationships while eliminating unnecessary sequence duplications. Contributions: In this paper, we formalize the concept of transcriptome segmentation and propose an efficient algorithm for generating segment libraries based on a length parameter dependent on specific RNA-Seq library construction. The resulting segment sequences can be used with pseudo-alignment tools to quantify expression at the segment level. We characterize the segment libraries for the reference transcriptomes of Drosophila melanogaster and Homo sapiens and provide gene-level visualization of the segments for better interpretability. Then we demonstrate the use of segments-level quantification into gene expression and alternative splicing analysis. The notion of transcript segmentation as introduced here and implemented in Yanagi opens the door for the application of lightweight, ultra-fast pseudo-alignment algorithms in a wide variety of RNA-seq analyses. Conclusion: Using segment library rather than the standard transcriptome succeeds in significantly reducing ambigious alignments where reads are multimapped to several sequences in the reference. That allowed avoiding the quantification step required by standard kmer-based pipelines for gene expression analysis. Moreover, using segment counts as statistics for alternative splicing analysis enables achieving comparable performance to counting-based approaches (e.g. rMATS) while rather using fast and lighthweight pseudo alignment.},
- author = {Gunady, Mohamed K and Mount, Stephen M and Bravo, Corrada},
- doi = {10.1101/364281},
- file = {:Users/ryan/Documents/Mendeley Desktop/Gunady, Mount, Bravo - 2018 - Fast and interpretable alternative splicing and differential gene-level expression analysis using transcri.pdf:pdf},
- journal = {bioRxiv Bioinformatics},
- pages = {1--23},
- title = {{Fast and interpretable alternative splicing and differential gene-level expression analysis using transcriptome segmentation with Yanagi}},
- url = {http://biorxiv.org/cgi/content/short/364281v1},
- year = {2018}
- }
- @article{Shen2012,
- abstract = {Ultra-deep RNA sequencing has become a powerful approach for genome-wide analysis of pre-mRNA alternative splicing. We develop MATS (multivariate analysis of transcript splicing), a bayesian statistical framework for flexible hypothesis testing of differential alternative splicing patterns on RNA-Seq data. MATS uses a multivariate uniform prior to model the between-sample correlation in exon splicing patterns, and a Markov chain Monte Carlo (MCMC) method coupled with a simulation-based adaptive sampling procedure to calculate the P-value and false discovery rate (FDR) of differential alternative splicing. Importantly, the MATS approach is applicable to almost any type of null hypotheses of interest, providing the flexibility to identify differential alternative splicing events that match a given user-defined pattern. We evaluated the performance of MATS using simulated and real RNA-Seq data sets. In the RNA-Seq analysis of alternative splicing events regulated by the epithelial-specific splicing factor ESRP1, we obtained a high RT-PCR validation rate of 86{\%} for differential exon skipping events with a MATS FDR of {\textless}10{\%}. Additionally, over the full list of RT-PCR tested exons, the MATS FDR estimates matched well with the experimental validation rate. Our results demonstrate that MATS is an effective and flexible approach for detecting differential alternative splicing from RNA-Seq data.},
- author = {Shen, Shihao and Park, Juw Won and Huang, Jian and Dittmar, Kimberly a and Lu, Zhi-xiang and Zhou, Qing and Carstens, Russ P and Xing, Yi},
- doi = {10.1093/nar/gkr1291},
- file = {:Users/ryan/Documents/Mendeley Desktop/Shen et al. - 2012 - MATS a Bayesian framework for flexible detection of differential alternative splicing from RNA-Seq data.pdf:pdf},
- issn = {1362-4962},
- journal = {Nucleic acids research},
- keywords = {Alternative Splicing,Bayes Theorem,Brain,Brain: metabolism,Cell Line, Tumor,High-Throughput Nucleotide Sequencing,Humans,Multivariate Analysis,RNA-Binding Proteins,RNA-Binding Proteins: metabolism,Reverse Transcriptase Polymerase Chain Reaction,Sequence Analysis, RNA},
- month = {apr},
- number = {8},
- pages = {e61},
- pmid = {22266656},
- title = {{MATS: a Bayesian framework for flexible detection of differential alternative splicing from RNA-Seq data.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3333886{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {40},
- year = {2012}
- }
- @article{Shin2012,
- abstract = {Butyrate-induced histone acetylation plays an important role in the regulation of gene expression. However, the regulation mechanisms of histone modification remain largely unclear. To comprehensively analyze histone modification induced by butyrate, we utilized chromatin immunoprecipitation (ChIP) technology combined with next-generation sequencing technology (ChIP-seq) to analyze histone modification (acetylation) induced by butyrate and to map the epigenomic landscape of normal histone H3 and acetylated histone H3K9 and H3K27 on a large scale. To determine the location of histone H3, acetyl-H3K9, and acetyl-H3K27 binding sites within the bovine genome, we analyzed the H3-, acetyl-H3K9-, and acetyl-H3K27-enriched binding regions in the proximal promoter within 5 kb upstream, or at the 5' untranslated region (UTR) from the transcriptional start site (TSS), exon, intron, and intergenic regions (defined as regions 25 kb upstream or 10 kb downstream from the TSS). Our analysis indicated that the distribution of histone H3, acetyl-H3K9, and acetyl-H3K27 correlated with transcription activity induced by butyrate. Using the GADEM algorithm, several motifs were generated for each of the ChIP-seq datasets. A de novo search for H3, acetyl-H3K9, and acetyl-H3K27 binding motifs indicated that histone modification (acetylation) at various locations changes the histone H3 binding preferences. Our results reveal that butyrate-induced acetylation in H3K9 and H3K27 changes the sequence-based binding preference of histone H3 and underlies the potential mechanisms of gene expression regulation induced by butyrate.},
- author = {Shin, Joo Heon and Li, Robert W. and Gao, Yuan and {Baldwin VI}, Ransom and Li, Cong Jun},
- doi = {10.1007/s10142-012-0263-6},
- file = {:Users/ryan/Documents/Mendeley Desktop/Shin et al. - 2012 - Genome-wide ChIP-seq mapping and analysis reveal butyrate-induced acetylation of H3K9 and H3K27 correlated with tra.pdf:pdf},
- isbn = {1438-7948 (Electronic)$\backslash$r1438-793X (Linking)},
- issn = {1438793X},
- journal = {Functional and Integrative Genomics},
- keywords = {Bovine,Butyrate,ChIP-seq,Epigenomics,Histone acetylation},
- number = {1},
- pages = {119--130},
- pmid = {22249597},
- title = {{Genome-wide ChIP-seq mapping and analysis reveal butyrate-induced acetylation of H3K9 and H3K27 correlated with transcription activity in bovine cells}},
- url = {https://link.springer.com/content/pdf/10.1007{\%}2Fs10142-012-0263-6.pdf},
- volume = {12},
- year = {2012}
- }
- @article{Fan2011,
- abstract = {The Affymetrix GeneChip Exon Array can be used to detect alternative splice variants. Microarray Detection of Alternative Splicing (MIDAS) and Partek({\textregistered}) Genomics Suite (Partek({\textregistered}) GS) are among the most popular analytical methods used to analyze exon array data. While both methods utilize statistical significance for testing, MIDAS and Partek({\textregistered}) GS could produce somewhat different results due to different underlying assumptions. Comparing MIDAS and Partek({\textregistered}) GS is quite difficult due to their substantially different mathematical formulations and assumptions regarding alternative splice variants. For meaningful comparison, we have used the previously published generalized probe model (GPM) which encompasses both MIDAS and Partek({\textregistered}) GS under different assumptions. We analyzed a colon cancer exon array data set using MIDAS, Partek({\textregistered}) GS and GPM. MIDAS and Partek({\textregistered}) GS produced quite different sets of genes that are considered to have alternative splice variants. Further, we found that GPM produced results similar to MIDAS as well as to Partek({\textregistered}) GS under their respective assumptions. Within the GPM, we show how discoveries relating to alternative variants can be quite different due to different assumptions. MIDAS focuses on relative changes in expression values across different exons within genes and tends to be robust but less efficient. Partek({\textregistered}) GS, however, uses absolute expression values of individual exons within genes and tends to be more efficient but more sensitive to the presence of outliers. From our observations, we conclude that MIDAS and Partek({\textregistered}) GS produce complementary results, and discoveries from both analyses should be considered.},
- author = {Fan, Wenhong and Stirewalt, Derek L and Radich, Jerald P and Zhao, Lueping},
- file = {:Users/ryan/Documents/Mendeley Desktop/Fan et al. - 2011 - Comparison of Two Methods for Detecting Alternative Splice Variants Using GeneChip({\textregistered}) Exon Arrays.pdf:pdf},
- issn = {1550-9702},
- journal = {International journal of biomedical science : IJBS},
- keywords = {alternative splicing,exon,gene expression analysis},
- month = {sep},
- number = {3},
- pages = {172--80},
- pmid = {23675234},
- title = {{Comparison of Two Methods for Detecting Alternative Splice Variants Using GeneChip({\textregistered}) Exon Arrays.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3614835{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {7},
- year = {2011}
- }
- @article{Anders2014,
- abstract = {Motivation: A large choice of tools exists for many standard tasks in the analysis of high-throughput sequencing (HTS) data. However, once a project deviates from standard work flows, custom scripts are needed. Results: We present HTSeq, a Python library to facilitate the rapid development of such scripts. HTSeq offers parsers for many common data formats in HTS projects, as well as classes to represent data such as genomic coordinates, sequences, sequencing reads, alignments, gene model information, variant calls, and provides data structures that allow for querying via genomic coordinates. We also present htseq-count, a tool developed with HTSeq that preprocesses RNA-Seq data for differential expression analysis by counting the overlap of reads with genes. Availability: HTSeq is released as open-source software under the GNU General Public Licence and available from http://www-huber.embl.de/HTSeq or from the Python Package Index, https://pypi.python.org/pypi/HTSeq},
- author = {Anders, S. and Pyl, P. T. and Huber, W.},
- doi = {10.1093/bioinformatics/btu638},
- file = {:Users/ryan/Documents/Mendeley Desktop/Anders, Pyl, Huber - 2015 - HTSeq--a Python framework to work with high-throughput sequencing data.pdf:pdf},
- isbn = {1367-4811 (Electronic) 1367-4803 (Linking)},
- issn = {1367-4803},
- journal = {Bioinformatics},
- month = {jan},
- number = {2},
- pages = {166--169},
- pmid = {25260700},
- title = {{HTSeq--a Python framework to work with high-throughput sequencing data}},
- url = {http://biorxiv.org/content/early/2014/08/19/002824.abstract http://biorxiv.org/lookup/doi/10.1101/002824 http://bioinformatics.oxfordjournals.org/cgi/doi/10.1093/bioinformatics/btu638},
- volume = {31},
- year = {2015}
- }
- @misc{Harmsen2017,
- abstract = {The unique spectral signatures and biologically inert compositions of surface-enhanced resonance Raman scattering (SERRS) nanoparticles make them promising contrast agents for in vivo cancer imaging. Our SERRS nanoparticles consist of a 60-nm gold nanoparticle core that is encapsulated in a 15-nm-thick silica shell wherein the resonant Raman reporter is embedded. Subtle aspects of their preparation can shift their limit of detection by orders of magnitude. In this protocol, we present the optimized, step-by-step procedure for generating reproducible SERRS nanoparticles with femtomolar (10(-15) M) limits of detection. We provide ways of characterizing the optical properties of SERRS nanoparticles using UV/VIS and Raman spectroscopy, and their physicochemical properties using transmission electron microscopy and nanoparticle tracking analysis. We introduce several applications of these nanoprobes for biomedical research, with a focus on intraoperative cancer imaging via Raman imaging. A detailed account is provided for successful i.v. administration of SERRS nanoparticles such that delineation of cancerous lesions can be achieved in vivo and ex vivo on resected tissues without the need for specific biomarker targeting. This straightforward, yet comprehensive, protocol-from initial de novo gold nanoparticle synthesis to SERRS nanoparticle contrast-enhanced preclinical Raman imaging in animal models-takes ∼96 h.},
- author = {Harmsen, Stefan and Wall, Matthew A. and Huang, Ruimin and Kircher, Moritz F.},
- booktitle = {Nature protocols},
- doi = {10.1038/nprot.2017.031},
- file = {:Users/ryan/Documents/Mendeley Desktop/Harmsen et al. - 2017 - Cancer imaging using surface-enhanced resonance Raman scattering nanoparticles.pdf:pdf},
- issn = {17502799},
- number = {7},
- pages = {1400--1414},
- pmid = {28686581},
- title = {{Cancer imaging using surface-enhanced resonance Raman scattering nanoparticles}},
- url = {https://www.nature.com/articles/tp2015169.pdf},
- volume = {12},
- year = {2017}
- }
- @article{Choi2014,
- abstract = {BACKGROUND: Transcriptome analysis of porcine whole blood has several applications, which include deciphering genetic mechanisms for host responses to viral infection and vaccination. The abundance of alpha- and beta-globin transcripts in blood, however, impedes the ability to cost-effectively detect transcripts of low abundance. Although protocols exist for reduction of globin transcripts from human and mouse/rat blood, preliminary work demonstrated these are not useful for porcine blood Globin Reduction (GR). Our objectives were to develop a porcine specific GR protocol and to evaluate the GR effects on gene discovery and sequence read coverage in RNA-sequencing (RNA-seq) experiments.$\backslash$n$\backslash$nRESULTS: A GR protocol for porcine blood samples was developed using RNase H with antisense oligonucleotides specifically targeting porcine hemoglobin alpha (HBA) and beta (HBB) mRNAs. Whole blood samples (n = 12) collected in Tempus tubes were used for evaluating the efficacy and effects of GR on RNA-seq. The HBA and HBB mRNA transcripts comprised an average of 46.1{\%} of the mapped reads in pre-GR samples, but those reads reduced to an average of 8.9{\%} in post-GR samples. Differential gene expression analysis showed that the expression level of 11,046 genes were increased, whereas 34 genes, excluding HBA and HBB, showed decreased expression after GR (FDR {\textless}0.05). An additional 815 genes were detected only in post-GR samples.$\backslash$n$\backslash$nCONCLUSIONS: Our porcine specific GR primers and protocol minimize the number of reads of globin transcripts in whole blood samples and provides increased coverage as well as accuracy and reproducibility of transcriptome analysis. Increased detection of low abundance mRNAs will ensure that studies relying on transcriptome analyses do not miss information that may be vital to the success of the study.},
- author = {Choi, Igseo and Bao, Hua and Kommadath, Arun and Hosseini, Afshin and Sun, Xu and Meng, Yan and Stothard, Paul and Plastow, Graham S. and Tuggle, Christopher K. and Reecy, James M. and Fritz-Waters, Eric and Abrams, Samuel M. and Lunney, Joan K. and Guan, Le Luo},
- doi = {10.1186/1471-2164-15-954},
- file = {:Users/ryan/Documents/Mendeley Desktop/Choi et al. - 2014 - Increasing gene discovery and coverage using RNA-seq of globin RNA reduced porcine blood samples.pdf:pdf},
- issn = {14712164},
- journal = {BMC Genomics},
- keywords = {Blood,Globin reduction,Pig,RNA-seq,Transcriptome},
- number = {1},
- pages = {1--10},
- title = {{Increasing gene discovery and coverage using RNA-seq of globin RNA reduced porcine blood samples}},
- url = {https://bmcgenomics.biomedcentral.com/track/pdf/10.1186/1471-2164-15-954},
- volume = {15},
- year = {2014}
- }
- @article{Nookaew2012,
- abstract = {RNA-seq, has recently become an attractive method of choice in the studies of transcriptomes, promising several advantages compared with microarrays. In this study, we sought to assess the contribution of the different analytical steps involved in the analysis of RNA-seq data generated with the Illumina platform, and to perform a cross-platform comparison based on the results obtained through Affymetrix microarray. As a case study for our work we, used the Saccharomyces cerevisiae strain CEN.PK 113-7D, grown under two different conditions (batch and chemostat). Here, we asses the influence of genetic variation on the estimation of gene expression level using three different aligners for read-mapping (Gsnap, Stampy and TopHat) on S288c genome, the capabilities of five different statistical methods to detect differential gene expression (baySeq, Cuffdiff, DESeq, edgeR and NOISeq) and we explored the consistency between RNA-seq analysis using reference genome and de novo assembly approach. High reproducibility among biological replicates (correlation ≥0.99) and high consistency between the two platforms for analysis of gene expression levels (correlation ≥0.91) are reported. The results from differential gene expression identification derived from the different statistical methods, as well as their integrated analysis results based on gene ontology annotation are in good agreement. Overall, our study provides a useful and comprehensive comparison between the two platforms (RNA-seq and microrrays) for gene expression analysis and addresses the contribution of the different steps involved in the analysis of RNA-seq data.},
- author = {Nookaew, Intawat and Papini, Marta and Pornputtpong, Natapol and Scalcinati, Gionata and Fagerberg, Linn and Uhl{\'{e}}n, Matthias and Nielsen, Jens},
- doi = {10.1093/nar/gks804},
- file = {:Users/ryan/Documents/Mendeley Desktop/Nookaew et al. - 2012 - A comprehensive comparison of RNA-Seq-based transcriptome analysis from reads to differential gene expression an.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Nookaew et al. - 2012 - A comprehensive comparison of RNA-Seq-based transcriptome analysis from reads to differential gene expression an.pdf:pdf},
- issn = {1362-4962},
- journal = {Nucleic acids research},
- month = {sep},
- number = {20},
- pages = {1--14},
- pmid = {22965124},
- title = {{A comprehensive comparison of RNA-Seq-based transcriptome analysis from reads to differential gene expression and cross-comparison with microarrays: a case study in Saccharomyces cerevisiae.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/22965124},
- volume = {40},
- year = {2012}
- }
- @article{Smyth2005a,
- abstract = {MOTIVATION: Spotted arrays are often printed with probes in duplicate or triplicate, but current methods for assessing differential expression are not able to make full use of the resulting information. The usual practice is to average the duplicate or triplicate results for each probe before assessing differential expression. This results in the loss of valuable information about genewise variability.
- RESULTS: A method is proposed for extracting more information from within-array replicate spots in microarray experiments by estimating the strength of the correlation between them. The method involves fitting separate linear models to the expression data for each gene but with a common value for the between-replicate correlation. The method greatly improves the precision with which the genewise variances are estimated and thereby improves inference methods designed to identify differentially expressed genes. The method may be combined with empirical Bayes methods for moderating the genewise variances between genes. The method is validated using data from a microarray experiment involving calibration and ratio control spots in conjunction with spiked-in RNA. Comparing results for calibration and ratio control spots shows that the common correlation method results in substantially better discrimination of differentially expressed genes from those which are not. The spike-in experiment also confirms that the results may be further improved by empirical Bayes smoothing of the variances when the sample size is small.
- AVAILABILITY: The methodology is implemented in the limma software package for R, available from the CRAN repository http://www.r-project.org},
- author = {Smyth, Gordon K and Michaud, Jo{\"{e}}lle and Scott, Hamish S},
- doi = {10.1093/bioinformatics/bti270},
- file = {:Users/ryan/Documents/Mendeley Desktop/Smyth, Michaud, Scott - 2005 - Use of within-array replicate spots for assessing differential expression in microarray experiments.pdf:pdf},
- issn = {1367-4803},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {Algorithms,Gene Expression Profiling,Gene Expression Profiling: methods,Genetic Variation,Genetic Variation: genetics,In Situ Hybridization, Fluorescence,In Situ Hybridization, Fluorescence: methods,Linear Models,Models, Genetic,Models, Statistical,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,RNA, Messenger,RNA, Messenger: genetics,Software},
- month = {may},
- number = {9},
- pages = {2067--75},
- pmid = {15657102},
- title = {{Use of within-array replicate spots for assessing differential expression in microarray experiments.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/15657102},
- volume = {21},
- year = {2005}
- }
- @article{Teng2016,
- abstract = {ChIP-seq technology is widely used in biomedical and basic science research. The main application is the detection of genomic regions that bind to a protein of interest. ChIP-Seq studies rely on peak calling algorithms that attempt to infer protein-binding sites by detecting genomic regions associated with more mapped reads (coverage) than expected by chance as a result of the experimental protocol's lack of perfect specificity. We find that GC-content bias accounts for a substantial amount of variability in the observed coverage for ChIP-Seq experiments and that this variability leads to false positive peak calls. More concerning is that the GC-effect varies across experiments, with the effect strong enough to result in a substantial number of peaks called differently when different laboratories perform experiments on the same cell-line. Although solutions have been proposed for GC-bias corrections in other Next Generation Sequencing (NGS) applications, accounting for GC-content in ChIP-Seq data is challenging because the binding sites of interest tend to be more common in high GC-content regions, which confounds real biological signal with the unwanted variability we want to remove. To account for this challenge we introduce a statistical approach, based on a mixture model, that accounts for GC-content effects on both non-specific noise and signal induced by the binding site we seek to detect. The method can be used to account for this bias in binding quantification as well to improve existing peak calling algorithms. We use this approach to show improved consistency across laboratories.
- @mikelove 13h13 hours ago
- Mike Love Retweeted bioRxiv
- From @mingxiangteng and @rafalab "accounting for GC in ChIP-Seq data is challenging... binding sites... more common in high GC regions"},
- author = {Teng, Mingxiang and Irizarry, Rafael A.},
- file = {:Users/ryan/Documents/Mendeley Desktop/Teng, Irizarry - 2016 - Accounting for GC-content bias reduces systematic errors and batch effects in ChIP-Seq peak callers.pdf:pdf},
- journal = {bioRxiv},
- title = {{Accounting for GC-content bias reduces systematic errors and batch effects in ChIP-Seq peak callers}},
- url = {http://biorxiv.org/content/biorxiv/early/2016/12/01/090704.full.pdf},
- year = {2016}
- }
- @article{Stegle2010a,
- author = {Stegle, Oliver and Parts, Leopold and Durbin, Richard and Winn, John},
- doi = {10.1371/journal.pcbi.1000770},
- file = {:Users/ryan/Documents/Mendeley Desktop/Stegle et al. - 2010 - A Bayesian Framework to Account for Complex Non-Genetic Factors in Gene Expression Levels Greatly Increases Power.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Stegle et al. - 2010 - A Bayesian Framework to Account for Complex Non-Genetic Factors in Gene Expression Levels Greatly Increases Po(2).pdf:pdf},
- issn = {1553-7358},
- journal = {PLoS Computational Biology},
- number = {5},
- pages = {e1000770},
- title = {{A Bayesian Framework to Account for Complex Non-Genetic Factors in Gene Expression Levels Greatly Increases Power in eQTL Studies}},
- url = {http://dx.plos.org/10.1371/journal.pcbi.1000770},
- volume = {6},
- year = {2010}
- }
- @article{Soneson2013,
- abstract = {BACKGROUND: Finding genes that are differentially expressed between conditions is an integral part of understanding the molecular basis of phenotypic variation. In the past decades, DNA microarrays have been used extensively to quantify the abundance of mRNA corresponding to different genes, and more recently high-throughput sequencing of cDNA (RNA-seq) has emerged as a powerful competitor. As the cost of sequencing decreases, it is conceivable that the use of RNA-seq for differential expression analysis will increase rapidly. To exploit the possibilities and address the challenges posed by this relatively new type of data, a number of software packages have been developed especially for differential expression analysis of RNA-seq data.
- RESULTS: We conducted an extensive comparison of eleven methods for differential expression analysis of RNA-seq data. All methods are freely available within the R framework and take as input a matrix of counts, i.e. the number of reads mapping to each genomic feature of interest in each of a number of samples. We evaluate the methods based on both simulated data and real RNA-seq data.
- CONCLUSIONS: Very small sample sizes, which are still common in RNA-seq experiments, impose problems for all evaluated methods and any results obtained under such conditions should be interpreted with caution. For larger sample sizes, the methods combining a variance-stabilizing transformation with the 'limma' method for differential expression analysis perform well under many different conditions, as does the nonparametric SAMseq method.},
- author = {Soneson, Charlotte and Delorenzi, Mauro},
- doi = {10.1186/1471-2105-14-91},
- file = {:Users/ryan/Documents/Mendeley Desktop/Soneson, Delorenzi - 2013 - A comparison of methods for differential expression analysis of RNA-seq data.pdf:pdf},
- issn = {1471-2105},
- journal = {BMC bioinformatics},
- keywords = {differential expression,gene expression,rna-seq},
- month = {jan},
- number = {1},
- pages = {91},
- pmid = {23497356},
- publisher = {BMC Bioinformatics},
- title = {{A comparison of methods for differential expression analysis of RNA-seq data.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3608160{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {14},
- year = {2013}
- }
- @article{Zaykin2011,
- abstract = {The inverse normal and Fisher's methods are two common approaches for combining P-values. Whitlock demonstrated that a weighted version of the inverse normal method, or 'weighted Z-test', is superior to Fisher's method for combining P-values for one-sided T-tests. The problem with Fisher's method is that it does not take advantage of weighting and loses power to the weighted Z-test when studies are differently sized. This issue was recently revisited by Chen, who observed that Lancaster's variation of Fisher's method had higher power than the weighted Z-test. Nevertheless, the weighted Z-test has comparable power to Lancaster's method when its weights are set to square roots of sample sizes. Power can be further improved when additional information is available. Although there is no single approach that is the best in every situation, the weighted Z-test enjoys certain properties that make it an appealing choice as a combination method for meta-analysis.},
- author = {Zaykin, Dmitri V.},
- doi = {10.1111/j.1420-9101.2011.02297.x},
- file = {:Users/ryan/Documents/Mendeley Desktop/Zaykin - 2011 - Optimally weighted Z-test is a powerful method for combining probabilities in meta-analysis.pdf:pdf},
- isbn = {1420-9101 (Electronic)$\backslash$r1010-061X (Linking)},
- issn = {1010061X},
- journal = {Journal of Evolutionary Biology},
- keywords = {Combining P-values,Meta-analysis},
- number = {8},
- pages = {1836--1841},
- pmid = {21605215},
- title = {{Optimally weighted Z-test is a powerful method for combining probabilities in meta-analysis}},
- url = {http://onlinelibrary.wiley.com/store/10.1111/j.1420-9101.2011.02297.x/asset/j.1420-9101.2011.02297.x.pdf?v=1{\&}t=igeaw84i{\&}s=b6993a79fc90895ed30b8613134b87689560ebbc},
- volume = {24},
- year = {2011}
- }
- @article{Stanke2006,
- abstract = {In order to improve gene prediction, extrinsic evidence on the gene structure can be collected from various sources of information such as genome-genome comparisons and EST and protein alignments. However, such evidence is often incomplete and usually uncertain. The extrinsic evidence is usually not sufficient to recover the complete gene structure of all genes completely and the available evidence is often unreliable. Therefore extrinsic evidence is most valuable when it is balanced with sequence-intrinsic evidence.},
- author = {Stanke, Mario and Sch{\"{o}}ffmann, Oliver and Morgenstern, Burkhard and Waack, Stephan},
- doi = {10.1186/1471-2105-7-62},
- file = {:Users/ryan/Documents/Mendeley Desktop/Stanke et al. - 2006 - Gene prediction in eukaryotes with a generalized hidden Markov model that uses hints from external sources.pdf:pdf},
- issn = {1471-2105},
- journal = {BMC bioinformatics},
- keywords = {Algorithms,Animals,Artificial Intelligence,Base Sequence,Chromosome Mapping,Chromosome Mapping: methods,Computer Simulation,Databases, Genetic,Genetic Variation,Genetic Variation: genetics,Humans,Information Storage and Retrieval,Information Storage and Retrieval: methods,Markov Chains,Models, Genetic,Models, Statistical,Molecular Sequence Data,Pattern Recognition, Automated,Sequence Alignment,Sequence Alignment: methods,Sequence Analysis, DNA,Sequence Analysis, DNA: methods,Stochastic Processes},
- month = {jan},
- pages = {62},
- pmid = {16469098},
- title = {{Gene prediction in eukaryotes with a generalized hidden Markov model that uses hints from external sources.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=1409804{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {7},
- year = {2006}
- }
- @article{Aponte2011,
- abstract = {Two intermingled hypothalamic neuron populations specified by expression of agouti-related peptide (AGRP) or pro-opiomelanocortin (POMC) positively and negatively influence feeding behavior, respectively, possibly by reciprocally regulating downstream melanocortin receptors. However, the sufficiency of these neurons to control behavior and the relationship of their activity to the magnitude and dynamics of feeding are unknown. To measure this, we used channelrhodopsin-2 for cell type-specific photostimulation. Activation of only 800 AGRP neurons in mice evoked voracious feeding within minutes. The behavioral response increased with photoexcitable neuron number, photostimulation frequency and stimulus duration. Conversely, POMC neuron stimulation reduced food intake and body weight, which required melanocortin receptor signaling. However, AGRP neuron-mediated feeding was not dependent on suppressing this melanocortin pathway, indicating that AGRP neurons directly engage feeding circuits. Furthermore, feeding was evoked selectively over drinking without training or prior photostimulus exposure, which suggests that AGRP neurons serve a dedicated role coordinating this complex behavior.},
- author = {Aponte, Yexica and Atasoy, Deniz and Sternson, Scott M},
- doi = {10.1038/nn.2739},
- file = {:Users/ryan/Documents/Mendeley Desktop/Aponte, Atasoy, Sternson - 2011 - AGRP neurons are sufficient to orchestrate feeding behavior rapidly and without training.pdf:pdf},
- issn = {1546-1726},
- journal = {Nature neuroscience},
- keywords = {Agouti-Related Protein,Agouti-Related Protein: genetics,Agouti-Related Protein: metabolism,Animal,Animal: physiology,Animals,Behavior,Classical,Classical: physiology,Conditioning,Eating,Feeding Behavior,Feeding Behavior: physiology,Hypothalamus,Hypothalamus: cytology,Hypothalamus: metabolism,Melanocortins,Melanocortins: metabolism,Mice,Neurons,Neurons: metabolism,Photic Stimulation,Pro-Opiomelanocortin,Pro-Opiomelanocortin: metabolism,Recombinant Fusion Proteins,Recombinant Fusion Proteins: genetics,Recombinant Fusion Proteins: metabolism,Rhodopsin,Rhodopsin: genetics,Rhodopsin: metabolism},
- month = {mar},
- number = {3},
- pages = {351--5},
- pmid = {21209617},
- title = {{AGRP neurons are sufficient to orchestrate feeding behavior rapidly and without training.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3049940{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {14},
- year = {2011}
- }
- @article{Dabney2007,
- abstract = {Nearest-centroid classifiers have recently been successfully employed in high-dimensional applications, such as in genomics. A necessary step when building a classifier for high-dimensional data is feature selection. Feature selection is frequently carried out by computing univariate scores for each feature individually, without consideration for how a subset of features performs as a whole. We introduce a new feature selection approach for high-dimensional nearest centroid classifiers that instead is based on the theoretically optimal choice of a given number of features, which we determine directly here. This allows us to develop a new greedy algorithm to estimate this optimal nearest-centroid classifier with a given number of features. In addition, whereas the centroids are usually formed from maximum likelihood estimates, we investigate the applicability of high-dimensional shrinkage estimates of centroids. We apply the proposed method to clinical classification based on gene-expression microarrays, demonstrating that the proposed method can outperform existing nearest centroid classifiers.},
- author = {Dabney, Alan R and Storey, John D},
- doi = {10.1371/journal.pone.0001002},
- editor = {Zhu, Ji},
- file = {:Users/ryan/Documents/Mendeley Desktop/Dabney, Storey - 2007 - Optimality driven nearest centroid classification from genomic data.pdf:pdf},
- issn = {1932-6203},
- journal = {PLoS ONE},
- keywords = {Algorithms,Automated,Child,Data Interpretation,Discriminant Analysis,Gene Expression Profiling,Gene Expression Regulation,Genetic Techniques,Genomics,Humans,Leukemia,Lymphoma,Lymphoma: genetics,Models,Neoplastic,Oligonucleotide Array Sequence Analysis,Pattern Recognition,Statistical,Theoretical},
- month = {oct},
- number = {10},
- pages = {e1002},
- pmid = {17912341},
- title = {{Optimality Driven Nearest Centroid Classification from Genomic Data}},
- url = {http://dx.plos.org/10.1371/journal.pone.0001002},
- volume = {2},
- year = {2007}
- }
- @article{Aschoff2013,
- abstract = {MOTIVATION: Alternative splicing is central for cellular processes and substantially increases transcriptome and proteome diversity. Aberrant splicing events often have pathological consequences and are associated with various diseases and cancer types. The emergence of next-generation RNA sequencing (RNA-seq) provides an exciting new technology to analyse alternative splicing on a large scale. However, algorithms that enable the analysis of alternative splicing from short-read sequencing are not fully established yet and there are still no standard solutions available for a variety of data analysis tasks.
- RESULTS: We present a new method and software to predict genes that are differentially spliced between two different conditions using RNA-seq data. Our method uses geometric angles between the high dimensional vectors of exon read counts. With this, differential splicing can be detected even if the splicing events are composed of higher complexity and involve previously unknown splicing patterns. We applied our approach to two case studies including neuroblastoma tumour data with favourable and unfavourable clinical courses. We show the validity of our predictions as well as the applicability of our method in the context of patient clustering. We verified our predictions by several methods including simulated experiments and complementary in silico analyses. We found a significant number of exons with specific regulatory splicing factor motifs for predicted genes and a substantial number of publications linking those genes to alternative splicing. Furthermore, we could successfully exploit splicing information to cluster tissues and patients. Finally, we found additional evidence of splicing diversity for many predicted genes in normalized read coverage plots and in reads that span exon-exon junctions.
- AVAILABILITY: SplicingCompass is licensed under the GNU GPL and freely available as a package in the statistical language R at http://www.ichip.de/software/SplicingCompass.html},
- author = {Aschoff, Moritz and Hotz-Wagenblatt, Agnes and Glatting, Karl-Heinz and Fischer, Matthias and Eils, Roland and K{\"{o}}nig, Rainer},
- doi = {10.1093/bioinformatics/btt101},
- file = {:Users/ryan/Documents/Mendeley Desktop/Aschoff et al. - 2013 - SplicingCompass differential splicing detection using RNA-seq data.pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- month = {may},
- number = {9},
- pages = {1141--8},
- pmid = {23449093},
- title = {{SplicingCompass: differential splicing detection using RNA-seq data.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/23449093},
- volume = {29},
- year = {2013}
- }
- @article{Wu2010,
- author = {Wu, TD and Nacu, Serban},
- file = {:Users/ryan/Documents/Mendeley Desktop/Wu, Nacu - 2010 - Fast and SNP-tolerant detection of complex variants and splicing in short reads(2).pdf:pdf},
- journal = {Bioinformatics},
- pages = {1--9},
- title = {{Fast and SNP-tolerant detection of complex variants and splicing in short reads}},
- url = {http://bioinformatics.oxfordjournals.org/content/26/7/873.short},
- year = {2010}
- }
- @article{Marin-Bejar2013,
- abstract = {BACKGROUND The p53 transcription factor is located at the core of a complex wiring of signaling pathways that are critical for the preservation of cellular homeostasis. Only recently it has become clear that p53 regulates the expression of several long intergenic noncoding RNAs (lincRNAs). However, relatively little is known about the role that lincRNAs play in this pathway. RESULTS Here we characterize a lincRNA named Pint (p53 induced noncoding transcript). We show that Pint is aubiquitously expressed lincRNA that is finely regulated by p53. In mouse cells, Pint promotes cell proliferation and survival by regulating the expression of genes of the TGF-b, MAPK and p53 pathways. Pint is a nuclear lincRNA that directly interacts with the Polycomb repressive complex 2 (PRC2), and is required for PRC2 targeting of specific genes for H3K27 tri-methylation and repression. Furthermore, Pint functional activity is highly dependent on PRC2 expression. We have also identified Pint human ortholog (PINT), which presents suggestive analogies with the murine lincRNA. PINT is similarly regulated by p53, and its expression significantly correlates with the same cellular pathways as the mouse ortholog, including the p53 pathway. Interestingly, PINT is downregulated in colon primary tumors, while its overexpression inhibits the proliferation of tumor cells, suggesting a possible role as tumor suppressor. CONCLUSIONS Our results reveal a p53 autoregulatory negative mechanism where a lincRNA connects p53 activation with epigenetic silencing by PRC2. Additionally, we show analogies and differences between the murine and human orthologs, identifying a novel tumor suppressor candidate lincRNA.},
- author = {Mar{\'{i}}n-B{\'{e}}jar, Oskar and Marchese, Francesco P and Athie, Alejandro and S{\'{a}}nchez, Yolanda and Gonz{\'{a}}lez, Jovanna and Segura, Victor and Huang, Lulu and Moreno, Isabel and Navarro, Alfons and Monz{\'{o}}, Mariano and Garc{\'{i}}a-Foncillas, Jes{\'{u}}s and Rinn, John L and Guo, Shuling and Huarte, Maite},
- doi = {10.1186/gb-2013-14-9-r104},
- file = {:Users/ryan/Documents/Mendeley Desktop/Mar{\'{i}}n-B{\'{e}}jar et al. - 2013 - Pint lincRNA connects the p53 pathway with epigenetic silencing by the Polycomb repressive complex 2.pdf:pdf},
- isbn = {1465-6914 (Electronic) 1465-6906 (Linking)},
- issn = {1474-760X},
- journal = {Genome biology},
- keywords = {gene regulation,lincrna,non-coding rna,p53,polycomb repressive complex 2},
- number = {9},
- pages = {R104},
- pmid = {24070194},
- publisher = {BioMed Central Ltd},
- title = {{Pint lincRNA connects the p53 pathway with epigenetic silencing by the Polycomb repressive complex 2.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/24070194{\%}5Cnhttp://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC4053822},
- volume = {14},
- year = {2013}
- }
- @misc{Bekiranov2009,
- author = {Bekiranov, Stefan (University of Virginia)},
- file = {:Users/ryan/Documents/Mendeley Desktop/Bekiranov - 2009 - Introduction to ChIP-Seq Analysis using the SPP Package Read in Eland aligment.pdf:pdf},
- keywords = {presentation},
- mendeley-tags = {presentation},
- pages = {1--8},
- title = {{Introduction to ChIP-Seq Analysis using the SPP Package Read in Eland aligment}},
- year = {2009}
- }
- @article{Marioni2008,
- abstract = {Ultra-high-throughput sequencing is emerging as an attractive alternative to microarrays for genotyping, analysis of methylation patterns, and identification of transcription factor binding sites. Here, we describe an application of the Illumina sequencing (formerly Solexa sequencing) platform to study mRNA expression levels. Our goals were to estimate technical variance associated with Illumina sequencing in this context and to compare its ability to identify differentially expressed genes with existing array technologies. To do so, we estimated gene expression differences between liver and kidney RNA samples using multiple sequencing replicates, and compared the sequencing data to results obtained from Affymetrix arrays using the same RNA samples. We find that the Illumina sequencing data are highly replicable, with relatively little technical variation, and thus, for many purposes, it may suffice to sequence each mRNA sample only once (i.e., using one lane). The information in a single lane of Illumina sequencing data appears comparable to that in a single array in enabling identification of differentially expressed genes, while allowing for additional analyses such as detection of low-expressed genes, alternative splice variants, and novel transcripts. Based on our observations, we propose an empirical protocol and a statistical framework for the analysis of gene expression using ultra-high-throughput sequencing technology.},
- author = {Marioni, John C and Mason, Christopher E and Mane, Shrikant M and Stephens, Matthew and Gilad, Yoav},
- doi = {10.1101/gr.079558.108},
- file = {:Users/ryan/Documents/Mendeley Desktop/Marioni et al. - 2008 - RNA-seq an assessment of technical reproducibility and comparison with gene expression arrays.pdf:pdf},
- issn = {1088-9051},
- journal = {Genome research},
- keywords = {Gene Expression Profiling,Gene Expression Profiling: methods,Humans,Likelihood Functions,Male,Models, Biological,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,RNA, Messenger,RNA, Messenger: chemistry,RNA, Messenger: metabolism,Reproducibility of Results,Sequence Analysis, RNA,Sequence Analysis, RNA: methods},
- month = {sep},
- number = {9},
- pages = {1509--17},
- pmid = {18550803},
- title = {{RNA-seq: an assessment of technical reproducibility and comparison with gene expression arrays.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2527709{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {18},
- year = {2008}
- }
- @article{Maksimovic2012,
- abstract = {DNA methylation is the most widely studied epigenetic mark and is known to be essential to normal development and frequently disrupted in disease. The Illumina HumanMethylation450 BeadChip assays the methylation status of CpGs at 485,577 sites across the genome. Here we present Subset-quantile Within Array Normalization (SWAN), a new method that substantially improves the results from this platform by reducing technical variation within and between arrays. SWAN is available in the minfi Bioconductor package.},
- author = {Maksimovic, Jovana and Gordon, Lavinia and Oshlack, Alicia},
- doi = {10.1186/gb-2012-13-6-r44},
- file = {:Users/ryan/Documents/Mendeley Desktop/Maksimovic, Gordon, Oshlack - 2012 - SWAN Subset-quantile Within Array Normalization for Illumina Infinium HumanMethylation450 BeadChips.pdf:pdf},
- issn = {1465-6906},
- journal = {Genome Biology},
- number = {6},
- pages = {R44},
- title = {{SWAN: Subset-quantile Within Array Normalization for Illumina Infinium HumanMethylation450 BeadChips}},
- url = {https://genomebiology.biomedcentral.com/track/pdf/10.1186/gb-2012-13-6-r44},
- volume = {13},
- year = {2012}
- }
- @article{Schneider2017,
- author = {Schneider, Valerie A. and Bouk, Nathan and Chen, Hsiu-Chuan and Kitts, Paul A. and Murphy, Terence D. and Pruitt, Kim D. and Thibaud-Nissen, Fran{\c{c}}oise and Church, Deanna M. and Graves-Lindsay, Tina and Albracht, Derek and Fulton, Robert S. and Kremitzki, Milinn and Magrini, Vincent and Markovic, Chris and McGrath, Sean and Steinberg, Karyn Meltz and Wilson, Richard K. and Howe, Kerstin and Auger, Kate and Chow, William and Collins, Joanna and Harden, Glenn and Hubbard, Timothy and Pelan, Sarah and Simpson, Jared T. and Threadgold, Glen and Torrance, James and Wood, Jonathan M. and Durbin, Richard and Clarke, Laura and Flicek, Paul and Koren, Sergey and Phillippy, Adam M. and Boitano, Matthew and Peluso, Paul and Chin, Chen-Shan and Li, Heng and Eichler, Evan E.},
- doi = {10.1101/gr.213611.116},
- file = {:Users/ryan/Documents/Mendeley Desktop/Schneider et al. - 2017 - Evaluation of GRCh38 and de novo haploid genome assemblies demonstrates the enduring quality of the reference.pdf:pdf},
- issn = {1549-5469},
- journal = {Genome Research},
- keywords = {annotation,bioinformatics,biology,clinical research,genetics,genomics,haplotype,human biology,locus,ranging,reference genome},
- number = {5},
- pages = {849--864},
- title = {{Evaluation of GRCh38 and de novo haploid genome assemblies demonstrates the enduring quality of the reference assembly}},
- url = {https://genome.cshlp.org/content/27/5/849.full.pdf},
- volume = {27},
- year = {2017}
- }
- @article{Teschendorff2011,
- abstract = {A common difficulty in large-scale microarray studies is the presence of confounding factors, which may significantly skew estimates of statistical significance, cause unreliable feature selection and high false negative rates. To deal with these difficulties, an algorithmic framework known as Surrogate Variable Analysis (SVA) was recently proposed.},
- author = {Teschendorff, Andrew E. and Zhuang, Joanna and Widschwendter, Martin},
- doi = {10.1093/bioinformatics/btr171},
- file = {:Users/ryan/Documents/Mendeley Desktop/Teschendorff, Zhuang, Widschwendter - 2011 - Independent surrogate variable analysis to deconvolve confounding factors in large-scale mi.pdf:pdf},
- isbn = {1367-4811 (Electronic)$\backslash$n1367-4803 (Linking)},
- issn = {1460-2059},
- journal = {Bioinformatics},
- month = {jun},
- number = {11},
- pages = {1496--1505},
- pmid = {21471010},
- title = {{Independent surrogate variable analysis to deconvolve confounding factors in large-scale microarray profiling studies}},
- url = {https://academic.oup.com/bioinformatics/article-lookup/doi/10.1093/bioinformatics/btr171},
- volume = {27},
- year = {2011}
- }
- @article{Kent2003a,
- author = {Kent, W James and Baertsch, Robert and Hinrichs, Angie and Miller, Webb and Haussler, David},
- file = {:Users/ryan/Documents/Mendeley Desktop/Kent et al. - 2003 - Evolution ' s cauldron Duplication , deletion , and rearrangement in the mouse and human genomes.pdf:pdf},
- journal = {PNAS},
- number = {Track II},
- title = {{Evolution ' s cauldron : Duplication , deletion , and rearrangement in the mouse and human genomes}},
- year = {2003}
- }
- @article{Shao2012,
- author = {Shao, Zhen and Zhang, Yijing and Yuan, Guo-Cheng and Orkin, Stuart H and Waxman, David J},
- doi = {10.1186/gb-2012-13-3-r16},
- file = {:Users/ryan/Documents/Mendeley Desktop/Shao et al. - 2012 - MAnorm a robust model for quantitative comparison of ChIP-Seq data sets.pdf:pdf},
- issn = {1465-6906},
- journal = {Genome Biology},
- number = {3},
- pages = {R16},
- publisher = {BioMed Central Ltd},
- title = {{MAnorm: a robust model for quantitative comparison of ChIP-Seq data sets}},
- url = {http://genomebiology.com/2012/13/3/R16},
- volume = {13},
- year = {2012}
- }
- @article{Castellana2008,
- abstract = {Gene annotation underpins genome science. Most often protein coding sequence is inferred from the genome based on transcript evidence and computational predictions. While generally correct, gene models suffer from errors in reading frame, exon border definition, and exon identification. To ascertain the error rate of Arabidopsis thaliana gene models, we isolated proteins from a sample of Arabidopsis tissues and determined the amino acid sequences of 144,079 distinct peptides by tandem mass spectrometry. The peptides corresponded to 1 or more of 3 different translations of the genome: a 6-frame translation, an exon splice-graph, and the currently annotated proteome. The majority of the peptides (126,055) resided in existing gene models (12,769 confirmed proteins), comprising 40{\%} of annotated genes. Surprisingly, 18,024 novel peptides were found that do not correspond to annotated genes. Using the gene finding program AUGUSTUS and 5,426 novel peptides that occurred in clusters, we discovered 778 new protein-coding genes and refined the annotation of an additional 695 gene models. The remaining 13,449 novel peptides provide high quality annotation ({\textgreater}99{\%} correct) for thousands of additional genes. Our observation that 18,024 of 144,079 peptides did not match current gene models suggests that 13{\%} of the Arabidopsis proteome was incomplete due to approximately equal numbers of missing and incorrect gene models.},
- author = {Castellana, Natalie E and Payne, Samuel H and Shen, Zhouxin and Stanke, Mario and Bafna, Vineet and Briggs, Steven P},
- doi = {10.1073/pnas.0811066106},
- file = {:Users/ryan/Documents/Mendeley Desktop/Castellana et al. - 2008 - Discovery and revision of Arabidopsis genes by proteogenomics.pdf:pdf},
- issn = {1091-6490},
- journal = {Proceedings of the National Academy of Sciences of the United States of America},
- keywords = {Arabidopsis,Arabidopsis Proteins,Arabidopsis Proteins: genetics,Arabidopsis: genetics,Genome, Plant,Genome, Plant: genetics,Models, Genetic,Proteome,Proteome: genetics,Proteomics,Proteomics: methods,Software},
- month = {dec},
- number = {52},
- pages = {21034--8},
- pmid = {19098097},
- title = {{Discovery and revision of Arabidopsis genes by proteogenomics.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2605632{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {105},
- year = {2008}
- }
- @article{Adcock1997,
- author = {Adcock, C. J.},
- doi = {10.1111/1467-9884.00082},
- file = {:Users/ryan/Documents/Mendeley Desktop/Adcock - 1997 - Sample size determination a review.pdf:pdf},
- issn = {0039-0526},
- journal = {Journal of the Royal Statistical Society: Series D (The Statistician)},
- keywords = {average coverage criterion,average length criterion,bayes factors,bayesian methods,binomial,coherence,distribution,hypothesis testing,maximum expected utility,mcnemar,multinomial distribution,multivariate analysis,normal distribution,pivots,regression,s test,sample size determination,tolerance intervals,worst},
- month = {jul},
- number = {2},
- pages = {261--283},
- title = {{Sample size determination: a review}},
- url = {http://doi.wiley.com/10.1111/1467-9884.00082},
- volume = {46},
- year = {1997}
- }
- @article{Kim2013,
- author = {Kim, Daehwan and Pertea, Geo and Trapnell, Cole},
- doi = {10.1186/gb-2013-14-4-r36},
- journal = {Genome {\ldots}},
- number = {4},
- pages = {R36},
- title = {{TopHat2: accurate alignment of transcriptomes in the presence of insertions, deletions and gene fusions}},
- url = {http://www.biomedcentral.com/content/pdf/gb-2013-14-4-r36.pdf},
- volume = {14},
- year = {2013}
- }
- @article{Joe2005a,
- author = {Joe, Harry and Zhu, Rong},
- doi = {10.1002/bimj.200410102},
- file = {:Users/ryan/Documents/Mendeley Desktop/Joe, Zhu - 2005 - Generalized Poisson Distribution the Property of Mixture of Poisson and Comparison with Negative Binomial Distribution.pdf:pdf},
- issn = {0323-3847},
- journal = {Biometrical Journal},
- keywords = {overdispersion,poisson mixture,skewness,zero-inflated distribution},
- month = {apr},
- number = {2},
- pages = {219--229},
- title = {{Generalized Poisson Distribution: the Property of Mixture of Poisson and Comparison with Negative Binomial Distribution}},
- url = {http://doi.wiley.com/10.1002/bimj.200410102},
- volume = {47},
- year = {2005}
- }
- @article{Kvam2012,
- abstract = {RNA-Seq technologies are quickly revolutionizing genomic studies, and statistical methods for RNA-seq data are under continuous development. Timely review and comparison of the most recently proposed statistical methods will provide a useful guide for choosing among them for data analysis. Particular interest surrounds the ability to detect differential expression (DE) in genes. Here we compare four recently proposed statistical methods, edgeR, DESeq, baySeq, and a method with a two-stage Poisson model (TSPM), through a variety of simulations that were based on different distribution models or real data. We compared the ability of these methods to detect DE genes in terms of the significance ranking of genes and false discovery rate control. All methods compared are implemented in freely available software. We also discuss the availability and functions of the currently available versions of these software.},
- author = {Kvam, Vanessa M and Liu, Peng and Si, Yaqing},
- doi = {10.3732/ajb.1100340},
- file = {:Users/ryan/Documents/Mendeley Desktop/Kvam, Liu, Si - 2012 - A comparison of statistical methods for detecting differentially expressed genes from RNA-seq data.pdf:pdf},
- issn = {1537-2197},
- journal = {American journal of botany},
- month = {feb},
- number = {2},
- pages = {248--56},
- pmid = {22268221},
- title = {{A comparison of statistical methods for detecting differentially expressed genes from RNA-seq data.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/22268221},
- volume = {99},
- year = {2012}
- }
- @article{Tuddenham2012,
- abstract = {Roseolovirus, or human herpesvirus 6 (HHV-6), is a ubiquitous human pathogen infecting over 95{\%} of the population by the age of 2 years. As with other herpesviruses, reactivation of HHV-6 can present with severe complications in immunocompromised individuals. Recent studies have highlighted the importance of herpesvirus-derived microRNAs (miRNAs) in modulating both cellular and viral gene expression. An initial report which computed the likelihood of various viruses to encode miRNAs did not predict HHV-6 miRNAs. To experimentally screen for small HHV-6-encoded RNAs, we conducted large-scale sequencing of Sup-T-1 cells lytically infected with a laboratory strain of HHV-6B. This revealed an abundant, 60- to 65-nucleotide RNA of unknown function derived from the lytic origin of replication (OriLyt) that gave rise to smaller RNA species of 18 or 19 nucleotides. In addition, we identified four pre-miRNAs whose mature forms accumulated in Argonaute 2. In contrast to the case for other betaherpesviruses, HHV-6B miRNAs are expressed from direct repeat regions (DR(L) and DR(R)) located at either side of the genome. All miRNAs are conserved in the closely related HHV-6A variant, and one of them is a seed ortholog of the human miRNA miR-582-5p. Similar to alphaherpesvirus miRNAs, they are expressed in antisense orientation relative to immediate-early open reading frames (ORFs) and thus have the potential to regulate key viral genes.},
- author = {Tuddenham, Lee and Jung, Jette S and Chane-Woon-Ming, B{\'{e}}atrice and D{\"{o}}lken, Lars and Pfeffer, S{\'{e}}bastien},
- doi = {10.1128/JVI.05911-11},
- file = {:Users/ryan/Documents/Mendeley Desktop/Tuddenham et al. - 2012 - Small RNA deep sequencing identifies microRNAs and other small noncoding RNAs from human herpesvirus 6B.pdf:pdf},
- issn = {1098-5514},
- journal = {Journal of virology},
- keywords = {Base Sequence,Cell Line,DNA Primers,Fluorescent Antibody Technique,Genes, Viral,Herpesvirus 6, Human,Herpesvirus 6, Human: genetics,Humans,Likelihood Functions,MicroRNAs,MicroRNAs: genetics,RNA, Untranslated,RNA, Untranslated: genetics,Replication Origin,Reverse Transcriptase Polymerase Chain Reaction},
- month = {feb},
- number = {3},
- pages = {1638--49},
- pmid = {22114334},
- title = {{Small RNA deep sequencing identifies microRNAs and other small noncoding RNAs from human herpesvirus 6B.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/22114334},
- volume = {86},
- year = {2012}
- }
- @article{Gerard2017,
- abstract = {In this paper we combine two important ideas in the analysis of large-scale genomics experiments (e.g. experiments that aim to identify genes that are differentially expressed between two conditions). The first idea is use of Empirical Bayes (EB) methods to handle the large number of potentially-sparse effects, and estimate false discovery rates and related quantities. The second is use of factor analysis methods to deal with sources of unwanted variation such as batch effects and unmeasured confounders. We describe a simple modular fitting procedure that combines the key ideas from both these lines of research, and thus produce new and powerful EB methods for analyzing genomics experiments that can account for both large numbers of potentially-sparse effects and for sources of unwanted variation that can otherwise confound inference. In realistic simulations, these new methods provide significant gains in power and calibration over competing methods. In real data analysis we highlight that different methods, while often conceptually similar, can vary widely in their assessments of statistical significance, highlighting the need for care in both choice of methods and interpretation of results.},
- archivePrefix = {arXiv},
- arxivId = {1709.10066},
- author = {Gerard, David and Stephens, Matthew},
- eprint = {1709.10066},
- file = {:Users/ryan/Documents/Mendeley Desktop/Gerard, Stephens - 2017 - Empirical Bayes Shrinkage and False Discovery Rate Estimation, Allowing For Unwanted Variation.pdf:pdf},
- keywords = {and phrases,batch effects,rna-seq,surrogate variable,unobserved confounding,unwanted variation},
- title = {{Empirical Bayes Shrinkage and False Discovery Rate Estimation, Allowing For Unwanted Variation}},
- url = {http://arxiv.org/abs/1709.10066},
- year = {2017}
- }
- @article{Langaas2005,
- abstract = {We consider the problem of estimating the proportion of true null hypotheses, $\pi$0,in a multiple-hypothesis set-up. The tests are based on observed p-values.We first review pub- lished estimators based on the estimator that was suggested by Schweder and Spj{\o}tvoll.Then we derive newestimators based on nonparametric maximumlikelihood estimation of thep-value density, restricting to decreasing and convex decreasing densities.The estimators of $\pi$0 are all derived under the assumption of independent test statistics. Their performance under depen- dence is investigated in a simulation study.We find that the estimators are relatively robust with respect to the assumption of independence and work well also for test statistics with moderate dependence.},
- author = {Langaas, Mette and Lindqvist, Bo Henry and Ferkingstad, Egil},
- doi = {10.1111/j.1467-9868.2005.00515.x},
- file = {:Users/ryan/Documents/Mendeley Desktop/Langaas, Lindqvist, Ferkingstad - 2005 - Estimating the proportion of true null hypotheses, with application to DNA microarray data.pdf:pdf},
- issn = {13697412},
- journal = {Journal of the Royal Statistical Society. Series B: Statistical Methodology},
- keywords = {Bioinformatics,Decreasing and convex density,Dependent test statistics,Multiple testing,Nonparametric maximum likelihood estimator,p-value},
- number = {4},
- pages = {555--572},
- title = {{Estimating the proportion of true null hypotheses, with application to DNA microarray data}},
- url = {http://onlinelibrary.wiley.com/store/10.1111/j.1467-9868.2005.00515.x/asset/j.1467-9868.2005.00515.x.pdf?v=1{\&}t=iyywa6nr{\&}s=c381fd1e5310a6223d5bdf78209e99ba71a64d16},
- volume = {67},
- year = {2005}
- }
- @article{Srivastava2016,
- author = {Srivastava, Avi and Sarkar, Hirak and Gupta, Nitish and Patro, Rob},
- doi = {10.1093/bioinformatics/btw277},
- file = {:Users/ryan/Documents/Mendeley Desktop/Srivastava et al. - 2016 - RapMap a rapid, sensitive and accurate tool for mapping RNA-seq reads to transcriptomes.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Srivastava et al. - 2016 - RapMap a rapid, sensitive and accurate tool for mapping RNA-seq reads to transcriptomes.pdf:pdf},
- issn = {1367-4803},
- journal = {Bioinformatics},
- month = {oct},
- number = {12},
- pages = {i192--i200},
- pmid = {27307617},
- title = {{RapMap: a rapid, sensitive and accurate tool for mapping RNA-seq reads to transcriptomes}},
- url = {http://biorxiv.org/content/biorxiv/early/2016/01/16/029652.full.pdf http://biorxiv.org/lookup/doi/10.1101/029652},
- volume = {32},
- year = {2016}
- }
- @article{Robinson1991a,
- author = {Robinson, GK},
- file = {:Users/ryan/Documents/Mendeley Desktop/Robinson - 1991 - That BLUP is a Good Thing The Estimation of Random Effects.pdf:pdf},
- journal = {Statistical Science},
- number = {1},
- pages = {15--32},
- title = {{That BLUP is a Good Thing : The Estimation of Random Effects}},
- url = {http://www.jstor.org/stable/2245695 http://projecteuclid.org/euclid.ss/1177011926},
- volume = {6},
- year = {1991}
- }
- @article{Boley2014a,
- abstract = {The identification of full length transcripts entirely from short-read RNA sequencing data (RNA-seq) remains a challenge in the annotation of genomes. Here we describe an automated pipeline for genome annotation that integrates RNA-seq and gene-boundary data sets, which we call Generalized RNA Integration Tool, or GRIT. Applying GRIT to Drosophila melanogaster short-read RNA-seq, cap analysis of gene expression (CAGE) and poly(A)-site-seq data collected for the modENCODE project, we recovered the vast majority of previously annotated transcripts and doubled the total number of transcripts cataloged. We found that 20{\%} of protein coding genes encode multiple protein-localization signals and that, in 20-d-old adult fly heads, genes with multiple polyadenylation sites are more common than genes with alternative splicing or alternative promoters. GRIT demonstrates 30{\%} higher precision and recall than the most widely used transcript assembly tools. GRIT will facilitate the automated generation of high-quality genome annotations without the need for extensive manual annotation.},
- author = {Boley, Nathan and Stoiber, Marcus H and Booth, Benjamin W and Wan, Kenneth H and Hoskins, Roger a and Bickel, Peter J and Celniker, Susan E and Brown, James B},
- doi = {10.1038/nbt.2850},
- file = {:Users/ryan/Documents/Mendeley Desktop/Boley et al. - 2014 - Genome-guided transcript assembly by integrative analysis of RNA sequence data.pdf:pdf},
- issn = {1546-1696},
- journal = {Nature biotechnology},
- month = {mar},
- pmid = {24633242},
- publisher = {Nature Publishing Group},
- title = {{Genome-guided transcript assembly by integrative analysis of RNA sequence data.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/24633242},
- year = {2014}
- }
- @article{Zhou2009a,
- abstract = {Advances in sequencing technologies have accelerated the sequencing of new genomes, far outpacing the generation of gene and protein resources needed to annotate them. Direct comparison and alignment of existing cDNA sequences from a related species is an effective and readily available means to determine genes in the new genomes. Current spliced alignment programs are inadequate for comparing sequences between different species, owing to their low sensitivity and splice junction accuracy. A new spliced alignment tool, sim4cc, overcomes problems in the earlier tools by incorporating three new features: universal spaced seeds, to increase sensitivity and allow comparisons between species at various evolutionary distances, and powerful splice signal models and evolutionarily-aware alignment techniques, to improve the accuracy of gene models. When tested on vertebrate comparisons at diverse evolutionary distances, sim4cc had significantly higher sensitivity compared to existing alignment programs, more than 10{\%} higher than the closest competitor for some comparisons, while being comparable in speed to its predecessor, sim4. Sim4cc can be used in one-to-one or one-to-many comparisons of genomic and cDNA sequences, and can also be effectively incorporated into a high-throughput annotation engine, as demonstrated by the mapping of 64,000 Fagus grandifolia 454 ESTs and unigenes to the poplar genome.},
- author = {Zhou, Leming and Pertea, Mihaela and Delcher, Arthur L and Florea, Liliana},
- doi = {10.1093/nar/gkp319},
- file = {:Users/ryan/Documents/Mendeley Desktop/Zhou et al. - 2009 - Sim4cc a cross-species spliced alignment program.pdf:pdf},
- isbn = {1111111111},
- issn = {1362-4962},
- journal = {Nucleic acids research},
- keywords = {Algorithms,Animals,Dogs,Genome, Plant,Genomics,Genomics: methods,Humans,Mice,RNA Splicing,Reference Standards,Sequence Alignment,Sequence Alignment: methods,Sequence Alignment: standards,Software,Vertebrates,Vertebrates: genetics},
- month = {jun},
- number = {11},
- pages = {e80},
- pmid = {19429899},
- title = {{Sim4cc: a cross-species spliced alignment program.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2699533{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {37},
- year = {2009}
- }
- @article{Cordero2012,
- abstract = {BACKGROUND: Massive Parallel Sequencing methods (MPS) can extend and improve the knowledge obtained by conventional microarray technology, both for mRNAs and short non-coding RNAs, e.g. miRNAs. The processing methods used to extract and interpret the information are an important aspect of dealing with the vast amounts of data generated from short read sequencing. Although the number of computational tools for MPS data analysis is constantly growing, their strengths and weaknesses as part of a complex analytical pipe-line have not yet been well investigated. PRIMARY FINDINGS: A benchmark MPS miRNA dataset, resembling a situation in which miRNAs are spiked in biological replication experiments was assembled by merging a publicly available MPS spike-in miRNAs data set with MPS data derived from healthy donor peripheral blood mononuclear cells. Using this data set we observed that short reads counts estimation is strongly under estimated in case of duplicates miRNAs, if whole genome is used as reference. Furthermore, the sensitivity of miRNAs detection is strongly dependent by the primary tool used in the analysis. Within the six aligners tested, specifically devoted to miRNA detection, SHRiMP and MicroRazerS show the highest sensitivity. Differential expression estimation is quite efficient. Within the five tools investigated, two of them (DESseq, baySeq) show a very good specificity and sensitivity in the detection of differential expression. CONCLUSIONS: The results provided by our analysis allow the definition of a clear and simple analytical optimized workflow for miRNAs digital quantitative analysis.},
- author = {Cordero, Francesca and Beccuti, Marco and Arigoni, Maddalena and Donatelli, Susanna and Calogero, Raffaele a},
- doi = {10.1371/journal.pone.0031630},
- file = {:Users/ryan/Documents/Mendeley Desktop/Cordero et al. - 2012 - Optimizing a massive parallel sequencing workflow for quantitative miRNA expression analysis.pdf:pdf},
- issn = {1932-6203},
- journal = {PloS one},
- keywords = {Algorithms,Databases, Genetic,Gene Expression Profiling,Gene Expression Regulation,Genome, Human,Genome, Human: genetics,High-Throughput Nucleotide Sequencing,High-Throughput Nucleotide Sequencing: methods,Humans,MicroRNAs,MicroRNAs: genetics,MicroRNAs: metabolism,ROC Curve,Reference Standards,Sample Size,Sequence Alignment,Software,Workflow},
- month = {jan},
- number = {2},
- pages = {e31630},
- pmid = {22363693},
- title = {{Optimizing a massive parallel sequencing workflow for quantitative miRNA expression analysis.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3282730{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {7},
- year = {2012}
- }
- @article{Chen2012,
- abstract = {Personalized medicine is expected to benefit from combining genomic information with regular monitoring of physiological states by multiple high-throughput methods. Here, we present an integrative personal omics profile (iPOP), an analysis that combines genomic, transcriptomic, proteomic, metabolomic, and autoantibody profiles from a single individual over a 14 month period. Our iPOP analysis revealed various medical risks, including type 2 diabetes. It also uncovered extensive, dynamic changes in diverse molecular components and biological pathways across healthy and diseased conditions. Extremely high-coverage genomic and transcriptomic data, which provide the basis of our iPOP, revealed extensive heteroallelic changes during healthy and diseased states and an unexpected RNA editing mechanism. This study demonstrates that longitudinal iPOP can be used to interpret healthy and diseased states by connecting genomic information with additional dynamic omics activity. {\textcopyright} 2012 Elsevier Inc.},
- author = {Chen, Rui and Mias, George I. and Li-Pook-Than, Jennifer and Jiang, Lihua and Lam, Hugo Y.K. and Chen, Rong and Miriami, Elana and Karczewski, Konrad J. and Hariharan, Manoj and Dewey, Frederick E. and Cheng, Yong and Clark, Michael J. and Im, Hogune and Habegger, Lukas and Balasubramanian, Suganthi and O'Huallachain, Maeve and Dudley, Joel T. and Hillenmeyer, Sara and Haraksingh, Rajini and Sharon, Donald and Euskirchen, Ghia and Lacroute, Phil and Bettinger, Keith and Boyle, Alan P. and Kasowski, Maya and Grubert, Fabian and Seki, Scott and Garcia, Marco and Whirl-Carrillo, Michelle and Gallardo, Mercedes and Blasco, Maria A. and Greenberg, Peter L. and Snyder, Phyllis and Klein, Teri E. and Altman, Russ B. and Butte, Atul J. and Ashley, Euan A. and Gerstein, Mark and Nadeau, Kari C. and Tang, Hua and Snyder, Michael},
- doi = {10.1016/j.cell.2012.02.009},
- file = {:Users/ryan/Documents/Mendeley Desktop/Chen et al. - 2012 - Personal Omics Profiling Reveals Dynamic Molecular and Medical Phenotypes(2).pdf:pdf},
- issn = {00928674},
- journal = {Cell},
- month = {mar},
- number = {6},
- pages = {1293--1307},
- publisher = {Elsevier Inc.},
- title = {{Personal Omics Profiling Reveals Dynamic Molecular and Medical Phenotypes}},
- url = {http://dx.doi.org/10.1016/j.cell.2012.02.009 https://linkinghub.elsevier.com/retrieve/pii/S0092867412001663},
- volume = {148},
- year = {2012}
- }
- @article{Bolstad2003,
- abstract = {MOTIVATION: When running experiments that involve multiple high density oligonucleotide arrays, it is important to remove sources of variation between arrays of non-biological origin. Normalization is a process for reducing this variation. It is common to see non-linear relations between arrays and the standard normalization provided by Affymetrix does not perform well in these situations.
- RESULTS: We present three methods of performing normalization at the probe intensity level. These methods are called complete data methods because they make use of data from all arrays in an experiment to form the normalizing relation. These algorithms are compared to two methods that make use of a baseline array: a one number scaling based algorithm and a method that uses a non-linear normalizing relation by comparing the variability and bias of an expression measure. Two publicly available datasets are used to carry out the comparisons. The simplest and quickest complete data method is found to perform favorably.
- AVAILABILITY: Software implementing all three of the complete data normalization methods is available as part of the R package Affy, which is a part of the Bioconductor project http://www.bioconductor.org.
- SUPPLEMENTARY INFORMATION: Additional figures may be found at http://www.stat.berkeley.edu/{\~{}}bolstad/normalize/index.html},
- author = {Bolstad, B M and Irizarry, R a and Astrand, M and Speed, T P},
- file = {:Users/ryan/Documents/Mendeley Desktop/Bolstad et al. - 2003 - A comparison of normalization methods for high density oligonucleotide array data based on variance and bias.pdf:pdf},
- issn = {1367-4803},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {Algorithms,Calibration,Models, Genetic,Molecular Probes,Nonlinear Dynamics,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: instrumen,Oligonucleotide Array Sequence Analysis: methods,Oligonucleotide Array Sequence Analysis: standards,Quality Control,Sequence Analysis, DNA,Sequence Analysis, DNA: methods,Sequence Analysis, DNA: standards,Stochastic Processes},
- month = {jan},
- number = {2},
- pages = {185--93},
- pmid = {12538238},
- title = {{A comparison of normalization methods for high density oligonucleotide array data based on variance and bias.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/12538238},
- volume = {19},
- year = {2003}
- }
- @techreport{Maciuca2016,
- author = {Maciuca, Sorina and {del Ojo Elias}, Carlos and McVean, Gil and Iqbal, Zamin},
- doi = {10.1101/059170},
- file = {:Users/ryan/Documents/Mendeley Desktop/Maciuca et al. - 2016 - A natural encoding of genetic variation in a Burrows-Wheeler Transform to enable mapping and genome inference.pdf:pdf},
- keywords = {burrows-wheeler transform,fm index,genome,pan-genome},
- month = {jun},
- pages = {1--10},
- title = {{A natural encoding of genetic variation in a Burrows-Wheeler Transform to enable mapping and genome inference}},
- url = {http://biorxiv.org/content/biorxiv/early/2016/06/15/059170.full.pdf http://biorxiv.org/lookup/doi/10.1101/059170},
- year = {2016}
- }
- @article{Langenberger2009,
- abstract = {MicroRNA-offset-RNAs (moRNAs) were recently detected as highly abundant class of small RNAs in a basal chordate. Using short read sequencing data, we show here that moRNAs are also produced from human microRNA precursors, albeit at quite low expression levels. The expression levels of moRNAs are unrelated to those of the associated microRNAs. Surprisingly, microRNA precursors that also show moRNAs are typically evolutionarily old, comprising more than half of the microRNA families that were present in early Bilateria, while evidence for moRNAs was found only for a relative small fraction of microRNA families of recent origin.},
- author = {Langenberger, David and Bermudez-Santana, Clara and Hertel, Jana and Hoffmann, Steve and Khaitovich, Philipp and Stadler, Peter F},
- doi = {10.1093/bioinformatics/btp419},
- file = {:Users/ryan/Documents/Mendeley Desktop/Langenberger et al. - 2009 - Evidence for human microRNA-offset RNAs in small RNA sequencing data(2).pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Langenberger et al. - 2009 - Evidence for human microRNA-offset RNAs in small RNA sequencing data.pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {Humans,MicroRNAs,MicroRNAs: chemistry,RNA,RNA: chemistry,Sequence Analysis,Small Interfering,Small Interfering: chemistry},
- month = {sep},
- number = {18},
- pages = {2298--301},
- pmid = {19584066},
- title = {{Evidence for human microRNA-offset RNAs in small RNA sequencing data.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/19584066},
- volume = {25},
- year = {2009}
- }
- @article{Pedersen2012a,
- abstract = {SUMMARY: comb-p is a command-line tool and a python library that manipulates BED files of possibly irregularly spaced P-values and (1) calculates auto-correlation, (2) combines adjacent P-values, (3) performs false discovery adjustment, (4) finds regions of enrichment (i.e. series of adjacent low P-values) and (5) assigns significance to those regions. In addition, tools are provided for visualization and assessment. We provide validation and example uses on bisulfite-seq with P-values from Fisher's exact test, tiled methylation probes using a linear model and Dam-ID for chromatin binding using moderated t-statistics. Because the library accepts input in a simple, standardized format and is unaffected by the origin of the P-values, it can be used for a wide variety of applications.$\backslash$n$\backslash$nAVAILABILITY: comb-p is maintained under the BSD license. The documentation and implementation are available at https://github.com/brentp/combined-pvalues.$\backslash$n$\backslash$nCONTACT: bpederse@gmail.com},
- author = {Pedersen, Brent S. and Schwartz, David a. and Yang, Ivana V. and Kechris, Katerina J.},
- doi = {10.1093/bioinformatics/bts545},
- file = {:Users/ryan/Documents/Mendeley Desktop/Pedersen et al. - 2012 - Comb-p Software for combining, analyzing, grouping and correcting spatially correlated P-values.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Pedersen et al. - 2012 - Comb-p Software for combining, analyzing, grouping and correcting spatially correlated P-values.pdf:pdf},
- isbn = {1367-4811 (Electronic)$\backslash$r1367-4803 (Linking)},
- issn = {13674803},
- journal = {Bioinformatics},
- number = {22},
- pages = {2986--2988},
- pmid = {22954632},
- title = {{Comb-p: Software for combining, analyzing, grouping and correcting spatially correlated P-values}},
- url = {http://bioinformatics.oxfordjournals.org/content/28/22/2986.full.pdf http://bioinformatics.oxfordjournals.org/content/early/2012/09/05/bioinformatics.bts545.full.pdf},
- volume = {28},
- year = {2012}
- }
- @article{Beleites2013,
- abstract = {In biospectroscopy, suitably annotated and statistically independent samples (e.g. patients, batches, etc.) for classifier training and testing are scarce and costly. Learning curves show the model performance as function of the training sample size and can help to determine the sample size needed to train good classifiers. However, building a good model is actually not enough: the performance must also be proven. We discuss learning curves for typical small sample size situations with 5-25 independent samples per class. Although the classification models achieve acceptable performance, the learning curve can be completely masked by the random testing uncertainty due to the equally limited test sample size. In consequence, we determine test sample sizes necessary to achieve reasonable precision in the validation and find that 75-100 samples will usually be needed to test a good but not perfect classifier. Such a data set will then allow refined sample size planning on the basis of the achieved performance. We also demonstrate how to calculate necessary sample sizes in order to show the superiority of one classifier over another: this often requires hundreds of statistically independent test samples or is even theoretically impossible. We demonstrate our findings with a data set of ca. 2550 Raman spectra of single cells (five classes: erythrocytes, leukocytes and three tumour cell lines BT-20, MCF-7 and OCI-AML3) as well as by an extensive simulation that allows precise determination of the actual performance of the models in question.},
- author = {Beleites, Claudia and Neugebauer, Ute and Bocklitz, Thomas and Krafft, Christoph and Popp, J{\"{u}}rgen},
- doi = {10.1016/j.aca.2012.11.007},
- file = {:Users/ryan/Documents/Mendeley Desktop/Beleites et al. - 2013 - Sample size planning for classification models.pdf:pdf},
- issn = {1873-4324},
- journal = {Analytica chimica acta},
- keywords = {Cells, Cultured,Erythrocytes,Erythrocytes: chemistry,Erythrocytes: classification,Erythrocytes: cytology,Humans,Leukocytes,Leukocytes: chemistry,Leukocytes: classification,Leukocytes: cytology,MCF-7 Cells,Models, Theoretical,Sample Size,Spectrum Analysis, Raman},
- month = {jan},
- number = {June 2012},
- pages = {25--33},
- pmid = {23265730},
- publisher = {Elsevier B.V.},
- title = {{Sample size planning for classification models.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/23265730},
- volume = {760},
- year = {2013}
- }
- @article{Standage2012,
- abstract = {Accurate gene structure annotation is a fundamental but somewhat elusive goal of genome projects, as witnessed by the fact that (model) genomes typically undergo several cycles of re-annotation. In many cases, it is not only different versions of annotations that need to be compared but also different sources of annotation of the same genome, derived from distinct gene prediction workflows. Such comparisons are of interest to annotation providers, prediction software developers, and end-users, who all need to assess what is common and what is different among distinct annotation sources. We developed ParsEval, a software application for pairwise comparison of sets of gene structure annotations. ParsEval calculates several statistics that highlight the similarities and differences between the two sets of annotations provided. These statistics are presented in an aggregate summary report, with additional details provided as individual reports specific to non-overlapping, gene-model-centric genomic loci. Genome browser styled graphics embedded in these reports help visualize the genomic context of the annotations. Output from ParsEval is both easily read and parsed, enabling systematic identification of problematic gene models for subsequent focused analysis.},
- author = {Standage, Daniel S and Brendel, Volker P},
- doi = {10.1186/1471-2105-13-187},
- file = {:Users/ryan/Documents/Mendeley Desktop/Standage, Brendel - 2012 - ParsEval parallel comparison and analysis of gene structure annotations.pdf:pdf},
- issn = {1471-2105},
- journal = {BMC bioinformatics},
- month = {jan},
- pages = {187},
- pmid = {22852583},
- title = {{ParsEval: parallel comparison and analysis of gene structure annotations.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3439248{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {13},
- year = {2012}
- }
- @article{Hawkins2013,
- abstract = {Naive CD4⁺ T cells can differentiate into specific helper and regulatory T cell lineages in order to combat infection and disease. The correct response to cytokines and a controlled balance of these populations is critical for the immune system and the avoidance of autoimmune disorders. To investigate how early cell-fate commitment is regulated, we generated the first human genome-wide maps of histone modifications that reveal enhancer elements after 72 hr of in vitro polarization toward T helper 1 (Th1) and T helper 2 (Th2) cell lineages. Our analysis indicated that even at this very early time point, cell-specific gene regulation and enhancers were at work directing lineage commitment. Further examination of lineage-specific enhancers identified transcription factors (TFs) with known and unknown T cell roles as putative drivers of lineage-specific gene expression. Lastly, an integrative analysis of immunopathogenic-associated SNPs suggests a role for distal regulatory elements in disease etiology.},
- author = {Hawkins, R David and Larjo, Antti and Tripathi, Subhash K and Wagner, Ulrich and Luu, Ying and L{\"{o}}nnberg, Tapio and Raghav, Sunil K and Lee, Leonard K and Lund, Riikka and Ren, Bing and L{\"{a}}hdesm{\"{a}}ki, Harri and Lahesmaa, Riitta},
- doi = {10.1016/j.immuni.2013.05.011},
- file = {:Users/ryan/Documents/Mendeley Desktop/Hawkins et al. - 2013 - Global chromatin state analysis reveals lineage-specific enhancers during the initiation of human T helper 1 and.pdf:pdf},
- issn = {1097-4180},
- journal = {Immunity},
- keywords = {Cell Differentiation,Cell Differentiation: genetics,Cell Lineage,Cell Lineage: genetics,Chromatin,Chromatin: metabolism,Genetic Predisposition to Disease,Genome-Wide Association Study,Histones,Histones: genetics,Histones: metabolism,Humans,Immune System Diseases,Immune System Diseases: genetics,Immune System Diseases: immunology,Polymorphism, Single Nucleotide,Promoter Regions, Genetic,Th1 Cells,Th1 Cells: immunology,Th1-Th2 Balance,Th2 Cells,Th2 Cells: immunology},
- month = {jun},
- number = {6},
- pages = {1271--84},
- pmid = {23791644},
- title = {{Global chromatin state analysis reveals lineage-specific enhancers during the initiation of human T helper 1 and T helper 2 cell polarization.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/23791644},
- volume = {38},
- year = {2013}
- }
- @article{Frazee2014,
- abstract = {RNA-sequencing (RNA-seq) is a flexible technology for measuring genome-wide expression that is rapidly replacing microarrays as costs become comparable. Current differential expression analysis methods for RNA-seq data fall into two broad classes: (1) methods that quantify expression within the boundaries of genes previously published in databases and (2) methods that attempt to reconstruct full length RNA transcripts. The first class cannot discover differential expression outside of previously known genes. While the second approach does possess discovery capabilities, statistical analysis of differential expression is complicated by the ambiguity and variability incurred while assembling transcripts and estimating their abundances. Here, we propose a novel method that first identifies differentially expressed regions (DERs) of interest by assessing differential expression at each base of the genome. The method then segments the genome into regions comprised of bases showing similar differential expression signal, and then assigns a measure of statistical significance to each region. Optionally, DERs can be annotated using a reference database of genomic features. We compare our approach with leading competitors from both current classes of differential expression methods and highlight the strengths and weaknesses of each. A software implementation of our method is available on github (https://github.com/alyssafrazee/derfinder).},
- author = {Frazee, Alyssa C and Sabunciyan, Sarven and Hansen, Kasper D and Irizarry, Rafael a and Leek, Jeffrey T},
- doi = {10.1093/biostatistics/kxt053},
- file = {:Users/ryan/Documents/Mendeley Desktop/Frazee et al. - 2014 - Differential expression analysis of RNA-seq data at single-base resolution.pdf:pdf},
- issn = {1468-4357},
- journal = {Biostatistics (Oxford, England)},
- keywords = {bioinformatics,differential expression,false discovery rate,genomics,rna sequencing},
- month = {jan},
- pages = {1--14},
- pmid = {24398039},
- title = {{Differential expression analysis of RNA-seq data at single-base resolution.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/24398039},
- year = {2014}
- }
- @article{Zeileis2008,
- author = {Zeileis, Achim and Kleiber, Christian and Jackman, Simon},
- file = {:Users/ryan/Documents/Mendeley Desktop/Zeileis, Kleiber, Jackman - 2008 - Regression Models for Count Data in R.pdf:pdf},
- keywords = {glm,hurdle model,negative binomial model,poisson model,zero-inflated model},
- number = {Mullahy 1986},
- title = {{Regression Models for Count Data in R}},
- year = {2008}
- }
- @article{Trapnell2012a,
- author = {Trapnell, Cole and Roberts, Adam and Goff, Loyal and Pertea, Geo and Kim, Daehwan and Kelley, David R and Pimentel, Harold and Salzberg, Steven L and Rinn, John L and Pachter, Lior},
- doi = {10.1038/nprot.2012.016},
- file = {:Users/ryan/Documents/Mendeley Desktop/Trapnell et al. - 2012 - Differential gene and transcript expression analysis of RNA-seq experiments with TopHat and Cufflinks.pdf:pdf},
- issn = {1754-2189},
- journal = {Nature Protocols},
- month = {mar},
- number = {3},
- pages = {562--578},
- publisher = {Nature Publishing Group},
- title = {{Differential gene and transcript expression analysis of RNA-seq experiments with TopHat and Cufflinks}},
- url = {http://www.nature.com/doifinder/10.1038/nprot.2012.016},
- volume = {7},
- year = {2012}
- }
- @techreport{Love2014,
- author = {Love, Michael I and Huber, Wolfgang and Anders, Simon},
- doi = {10.1101/002832},
- file = {:Users/ryan/Documents/Mendeley Desktop/Love, Huber, Anders - 2014 - Moderated estimation of fold change and dispersion for RNA-Seq data with DESeq2.pdf:pdf},
- month = {feb},
- title = {{Moderated estimation of fold change and dispersion for RNA-Seq data with DESeq2}},
- url = {http://biorxiv.org/lookup/doi/10.1101/002832},
- year = {2014}
- }
- @article{London2000,
- abstract = {To examine the functional characteristics of memory CD4+ T cells, we used an adoptive transfer system to generate a stable population of Ag-specific memory cells in vivo and compared their responses to Ag with those of a similar population of Ag-specific naive cells. Memory cells localized to the spleen and lymph nodes of mice and exhibited extremely rapid recall responses to Ag in vivo, leaving the spleen within 3-5 days of Ag encounter. Unlike their naive counterparts, memory cells produced effector cytokines (IFN-gamma, IL-4, IL-5) within 12-24 h of Ag exposure and did not require multiple cycles of cell division to do so. Memory cells proliferated at lower Ag concentrations than did naive cells, were less dependent on costimulation by B7 molecules, and independent of costimulation by CD40. Furthermore, effector cytokine production by memory cells also occurred in the absence of either B7 or CD40 costimulation. Lastly, memory cells were resistant to tolerance induction. Together, these findings suggest that the threshold for activation of memory CD4+ cells is lower than that of naive cells. This would permit memory cells to rapidly express their effector functions in vivo earlier in the course of a secondary immune response, when the levels of Ag and the availability of costimulation may be relatively low.},
- author = {London, Cheryl A. and Lodge, Michael P. and Abbas, Abul K.},
- doi = {10.4049/jimmunol.164.1.265},
- file = {:Users/ryan/Documents/Mendeley Desktop/London, Lodge, Abbas - 2000 - Functional Responses and Costimulator Dependence of Memory CD4 T Cells.pdf:pdf},
- issn = {0022-1767},
- journal = {The Journal of Immunology},
- number = {1},
- pages = {265--272},
- title = {{ Functional Responses and Costimulator Dependence of Memory CD4 + T Cells }},
- volume = {164},
- year = {2000}
- }
- @article{Rougemont2012,
- address = {Totowa, NJ},
- author = {Rougemont, Jacques and Naef, Felix},
- doi = {10.1007/978-1-61779-292-2},
- editor = {Deplancke, Bart and Gheldof, Nele},
- file = {:Users/ryan/Documents/Mendeley Desktop/Rougemont, Naef - 2012 - Gene Regulatory Networks.pdf:pdf},
- isbn = {978-1-61779-291-5},
- keywords = {bioinformatics,chromatin immunoprecipitation,factor binding,transcription regulation,transcriptional,ultra-high-throughput sequencing},
- publisher = {Humana Press},
- series = {Methods in Molecular Biology},
- title = {{Gene Regulatory Networks}},
- url = {http://www.springerlink.com/index/10.1007/978-1-61779-292-2},
- volume = {786},
- year = {2012}
- }
- @article{Allison2006,
- abstract = {In just a few years, microarrays have gone from obscurity to being almost ubiquitous in biological research. At the same time, the statistical methodology for microarray analysis has progressed from simple visual assessments of results to a weekly deluge of papers that describe purportedly novel algorithms for analysing changes in gene expression. Although the many procedures that are available might be bewildering to biologists who wish to apply them, statistical geneticists are recognizing commonalities among the different methods. Many are special cases of more general models, and points of consensus are emerging about the general approaches that warrant use and elaboration.},
- author = {Allison, David B and Cui, Xiangqin and Page, Grier P and Sabripour, Mahyar},
- doi = {10.1038/nrg1749},
- file = {:Users/ryan/Documents/Mendeley Desktop/Allison et al. - 2006 - Microarray data analysis from disarray to consolidation and consensus.pdf:pdf},
- issn = {1471-0056},
- journal = {Nature reviews. Genetics},
- keywords = {Algorithms,Cluster Analysis,Computational Biology,Computational Biology: methods,Computer Simulation,DNA, Complementary,DNA, Complementary: metabolism,Data Interpretation, Statistical,Gene Expression Profiling,Gene Expression Profiling: methods,Gene Expression Regulation,Genetic Techniques,Genetics,Humans,Microarray Analysis,Models, Biological,Models, Statistical,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,RNA, Messenger,RNA, Messenger: metabolism},
- month = {jan},
- number = {1},
- pages = {55--65},
- pmid = {16369572},
- title = {{Microarray data analysis: from disarray to consolidation and consensus.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/16369572},
- volume = {7},
- year = {2006}
- }
- @article{Kostka2008,
- abstract = {Microarray gene expression signatures hold great promise to improve diagnosis and prognosis of disease. However, current documentation standards of such signatures do not allow for an unambiguous application to study-external patients. This hinders independent evaluation, effectively delaying the use of signatures in clinical practice. Data from eight publicly available clinical microarray studies were analyzed and the consistency of study-internal with study-external diagnoses was evaluated. Study-external classifications were based on documented information only. Documenting a signature is conceptually different from reporting a list of genes. We show that even the exact quantitative specification of a classification rule alone does not define a signature unambiguously. We found that discrepancy between study-internal and study-external diagnoses can be as frequent as 30{\%} (worst case) and 18{\%} (median). By using the proposed documentation by value strategy, which documents quantitative preprocessing information, the median discrepancy was reduced to 1{\%}. The process of evaluating microarray gene expression diagnostic signatures and bringing them to clinical practice can be substantially improved and made more reliable by better documentation of the signatures.},
- author = {Kostka, Dennis and Spang, Rainer},
- doi = {10.1371/journal.pcbi.0040022},
- file = {:Users/ryan/Documents/Mendeley Desktop/Kostka, Spang - 2008 - Microarray based diagnosis profits from better documentation of gene expression signatures.pdf:pdf},
- issn = {1553-7358},
- journal = {PLoS computational biology},
- keywords = {Algorithms,Diagnosis, Computer-Assisted,Diagnosis, Computer-Assisted: methods,Documentation,Documentation: methods,Gene Expression Profiling,Gene Expression Profiling: methods,Humans,Neoplasm Proteins,Neoplasm Proteins: analysis,Neoplasms,Neoplasms: diagnosis,Neoplasms: metabolism,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Reproducibility of Results,Sensitivity and Specificity,Tumor Markers, Biological,Tumor Markers, Biological: analysis},
- month = {feb},
- number = {2},
- pages = {e22},
- pmid = {18282081},
- title = {{Microarray based diagnosis profits from better documentation of gene expression signatures.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2242819{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {4},
- year = {2008}
- }
- @article{Schmidl2009,
- abstract = {DNA methylation participates in establishing and maintaining chromatin structures and regulates gene transcription during mammalian development and cellular differentiation. With few exceptions, research thus far has focused on gene promoters, and little is known about the extent, functional relevance, and regulation of cell type-specific DNA meth- ylation at promoter-distal sites. Here, we present a comprehensive analysis of differential DNA methylation in human conventional CD4+ T cells (Tconv) and CD4+CD25+ regulatory T cells (Treg), cell types whose differentiation and function are known to be controlled by epigenetic mechanisms. Using a novel approach that is based on the separation of a genome into methylated and unmethylated fractions, we examined the extent of lineage-specific DNA methylation across whole gene loci. More than 100 differentially methylated regions (DMRs) were identified that are present mainly in cell type-specific genes (e.g., FOXP3, IL2RA, CTLA4, CD40LG, and IFNG) and show differential patterns of histone H3 lysine 4 methylation. Interestingly, the majority of DMRs were located at promoter-distal sites, and many of these areas harbor DNAmethylation-dependent enhancer activity in reporter gene assays. Thus, our study provides a comprehensive, locus- wide analysis of lineage-specific methylation patterns in Treg and Tconv cells, links cell type-specific DNA methylation with histone methylation and regulatory function, and identifies a number of cell type-specific, CpG methylation- sensitive enhancers in immunologically relevant genes.},
- author = {Schmidl, Christian and Klug, Maja and Boeld, Tina J and Andreesen, Reinhard and Hoffmann, Petra and Edinger, Matthias and Rehli, Michael},
- doi = {10.1101/gr.091470.109.type},
- file = {:Users/ryan/Documents/Mendeley Desktop/Schmidl et al. - 2009 - Lineage-specific DNA methylation in T cells correlates with histone methylation and enhancer activity.pdf:pdf},
- journal = {Genome Research},
- keywords = {epigenetics,methylation},
- mendeley-tags = {epigenetics,methylation},
- pages = {1165--1174},
- title = {{Lineage-specific DNA methylation in T cells correlates with histone methylation and enhancer activity}},
- year = {2009}
- }
- @article{VanHouwelingen2014,
- abstract = {This paper reviews and discusses the role of Empirical Bayes methodology in medical statistics in the last 50 years. It gives some background on the origin of the empirical Bayes approach and its link with the famous Stein estimator. The paper describes the application in four important areas in medical statistics: disease mapping, health care monitoring, meta-analysis, and multiple testing. It ends with a warning that the application of the outcome of an empirical Bayes analysis to the individual "subjects" is a delicate matter that should be handled with prudence and care.},
- author = {van Houwelingen, Hans C.},
- doi = {10.1002/bimj.201400073},
- file = {:Users/ryan/Documents/Mendeley Desktop/van Houwelingen - 2014 - The role of empirical Bayes methodology as a leading principle in modern medical statistics.pdf:pdf},
- isbn = {3130220062},
- issn = {03233847},
- journal = {Biometrical Journal},
- keywords = {Disease mapping,Empirical Bayes,Health care monitoring,Meta-analysis,Multiple testing},
- month = {nov},
- number = {6},
- pages = {919--932},
- pmid = {25205521},
- title = {{The role of empirical Bayes methodology as a leading principle in modern medical statistics}},
- url = {http://doi.wiley.com/10.1002/bimj.201400073},
- volume = {56},
- year = {2014}
- }
- @article{Schmeing2009,
- abstract = {The high-resolution structures of ribosomal subunits published in 2000 have revolutionized the field of protein translation. They facilitated the determination and interpretation of functional complexes of the ribosome by crystallography and electron microscopy. Knowledge of the precise positions of residues in the ribosome in various states has facilitated increasingly sophisticated biochemical and genetic experiments, as well as the use of new methods such as single-molecule kinetics. In this review, we discuss how the interaction between structural and functional studies over the last decade has led to a deeper understanding of the complex mechanisms underlying translation.},
- author = {Schmeing, T Martin and Ramakrishnan, V},
- doi = {10.1038/nature08403},
- file = {:Users/ryan/Documents/Mendeley Desktop/Schmeing, Ramakrishnan - 2009 - What recent ribosome structures have revealed about the mechanism of translation.pdf:pdf},
- issn = {1476-4687},
- journal = {Nature},
- keywords = {Bacterial Proteins,Bacterial Proteins: chemistry,Bacterial Proteins: metabolism,Biocatalysis,Protein Biosynthesis,Protein Biosynthesis: physiology,Ribosomal Proteins,Ribosomal Proteins: metabolism,Ribosomes,Ribosomes: chemistry,Ribosomes: metabolism,Structure-Activity Relationship},
- month = {oct},
- number = {7268},
- pages = {1234--42},
- pmid = {19838167},
- publisher = {Nature Publishing Group},
- title = {{What recent ribosome structures have revealed about the mechanism of translation.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/19838167},
- volume = {461},
- year = {2009}
- }
- @article{Israeli2007,
- abstract = {The survival of a transplanted organ is dependent on maintenance of continuous immunosuppression. However, even the strictest adherence to the recommended drug levels does not prevent the occurrence of numerous complications associated with immunosuppression. The efficacy of immunosuppression therapy protocols would be enhanced greatly by the availability of biotechnologies capable of identifying and predicting immunological events prior to the manifestation of clinical parameters indicating graft failure. The aim of the study was to evaluate the potential contribution of some modern tools for post-transplantation monitoring, and to propose a method for combining them into a comprehensive mechanism for this purpose. The technologies utilized in this study are among a group of ‘cutting edge' diagnostic methods at the initial steps of evaluation for their potential contribution for post-transplantation immune monitoring. This study was a pioneering opportunity to combine and utilize these tools jointly. The method of research was based on monitoring 13 adult kidney transplant recipients. The Immuknow assay determined cellular immunity status by quantitative measurement of intracellular ATP level in CD4+ lymphocytes after PHA stimulation. Sera were analyzed for concentration of soluble CD30 reflecting primary allo-stimulation and for donor specific anti-HLA antibodies responsible for accelerated and refractory rejection. The results were correlated with clinical and pathological parameters and appraisal of predictive value was attempted. In Immuknow assay analysis ATP incremental changes indicative of rejection or infection were found in 75{\%} and in 50{\%} incidences, respectively. In stable patients, the ATP deviation from the preoperative baseline, indicative of stable engraftment, was much less pronounced than in other habitual clinical tests. CD30 concentrations were measured greatly above normal values prior to biopsy-proven rejection episodes, both before and after the transplant operation. Anti-HLA antibodies were elevated at a later stage, concurrently with clinical manifestation of graft failure and rejection. Anti-HLA antibody level remained negligible in patients going through a stable post-transplant clinical course. Overall, the utilization of the platform of combined biotechnologies could serve as a valuable tool for immune monitoring in organ transplantation, allowing for therapeutic intervention that can favorably affect the clinical outcome.},
- author = {Israeli, Moshe and Yussim, Alex and Mor, Eitan and Sredni, Benjamin and Klein, Tirza},
- doi = {10.1016/J.TRIM.2007.03.005},
- issn = {0966-3274},
- journal = {Transplant Immunology},
- month = {jul},
- number = {1},
- pages = {7--12},
- publisher = {Elsevier},
- title = {{Preceeding the rejection: In search for a comprehensive post-transplant immune monitoring platform}},
- url = {https://www.sciencedirect.com/science/article/pii/S0966327407000755},
- volume = {18},
- year = {2007}
- }
- @article{Chicco2017,
- author = {Chicco, Davide and Grassi, Elena and Gonnella, Giorgio and Giacomoni, Franck and Clarke, Erik and Blankenberg, Daniel and Tran, Camy and Laurent, Sacha and Gopez, Matthew and Sennblad, Bengt and Baaijens, Jasmijn A and Ewels, Philip and Wright, Patrick R and Enache, Oana M and Roger, Pierrick and Dampier, Will and Koppstein, David and Devisetty, Upendra Kumar and Rausch, Tobias and Salatino, Adrian Emanuel and Seiler, Julien and Jung, Matthieu and Cumbo, Fabio and Moskalenko, Oleksandr and Bogema, Daniel R and Workentine, Matthew L and Newhouse, Stephen J and Leprevost, Veiga and Arvai, Kevin and Diseases, Kidney and States, United and Group, Bioinformatics},
- doi = {10.1101/207092},
- file = {:Users/ryan/Documents/Mendeley Desktop/Chicco et al. - 2017 - Bioconda A sustainable and comprehensive software distribution for the life sciences.pdf:pdf},
- pages = {1--12},
- title = {{Bioconda: A sustainable and comprehensive software distribution for the life sciences}},
- url = {https://www.biorxiv.org/content/biorxiv/early/2017/10/21/207092.full.pdf},
- year = {2017}
- }
- @article{Sales2012a,
- abstract = {BACKGROUND: Gene set analysis is moving towards considering pathway topology as a crucial feature. Pathway elements are complex entities such as protein complexes, gene family members and chemical compounds. The conversion of pathway topology to a gene/protein networks (where nodes are a simple element like a gene/protein) is a critical and challenging task that enables topology-based gene set analyses.Unfortunately, currently available R/Bioconductor packages provide pathway networks only from single databases. They do not propagate signals through chemical compounds and do not differentiate between complexes and gene families.
- RESULTS: Here we present graphite, a Bioconductor package addressing these issues. Pathway information from four different databases is interpreted following specific biologically-driven rules that allow the reconstruction of gene-gene networks taking into account protein complexes, gene families and sensibly removing chemical compounds from the final graphs. The resulting networks represent a uniform resource for pathway analyses. Indeed, graphite provides easy access to three recently proposed topological methods. The graphite package is available as part of the Bioconductor software suite.
- CONCLUSIONS: graphite is an innovative package able to gather and make easily available the contents of the four major pathway databases. In the field of topological analysis graphite acts as a provider of biological information by reducing the pathway complexity considering the biological meaning of the pathway elements.},
- author = {Sales, Gabriele and Calura, Enrica and Cavalieri, Duccio and Romualdi, Chiara},
- doi = {10.1186/1471-2105-13-20},
- file = {:Users/ryan/Documents/Mendeley Desktop/Sales et al. - 2012 - graphite - a Bioconductor package to convert pathway topology to gene network.pdf:pdf},
- issn = {1471-2105},
- journal = {BMC bioinformatics},
- keywords = {Gene Regulatory Networks,Humans,Insulin,Insulin: metabolism,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Receptor, Notch1,Receptor, Notch1: metabolism,Signal Transduction,Software},
- month = {jan},
- number = {1},
- pages = {20},
- pmid = {22292714},
- publisher = {BioMed Central Ltd},
- title = {{graphite - a Bioconductor package to convert pathway topology to gene network.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3296647{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {13},
- year = {2012}
- }
- @article{Strimmer2008,
- abstract = {False discovery rate (FDR) methods play an important role in analyzing high-dimensional data. There are two types of FDR, tail area-based FDR and local FDR, as well as numerous statistical algorithms for estimating or controlling FDR. These differ in terms of underlying test statistics and procedures employed for statistical learning.},
- author = {Strimmer, Korbinian},
- doi = {10.1186/1471-2105-9-303},
- file = {:Users/ryan/Documents/Mendeley Desktop/Strimmer - 2008 - A unified approach to false discovery rate estimation.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Strimmer - 2008 - A unified approach to false discovery rate estimation(2).pdf:pdf},
- isbn = {ISSN{\~{}}{\~{}}1471-2105},
- issn = {1471-2105},
- journal = {BMC bioinformatics},
- keywords = {Algorithms,Biometry,Biometry: methods,Breast Neoplasms,Breast Neoplasms: genetics,Confidence Intervals,Female,HIV,HIV: genetics,Humans,Likelihood Functions,Models,Oligonucleotide Array Sequence Analysis,Predictive Value of Tests,Sample Size,Software,Statistical},
- month = {jan},
- number = {1},
- pages = {303},
- pmid = {18613966},
- title = {{A unified approach to false discovery rate estimation.}},
- url = {http://www.biomedcentral.com/1471-2105/9/303 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2475539{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {9},
- year = {2008}
- }
- @article{Clark2013,
- author = {Clark, Tom S and Linzer, Drew A},
- file = {:Users/ryan/Documents/Mendeley Desktop/Clark, Linzer - 2013 - Should I Use Fixed or Random Effects.pdf:pdf},
- title = {{Should I Use Fixed or Random Effects ?}},
- year = {2013}
- }
- @article{Kuwano2009,
- abstract = {The RNA-binding protein nuclear factor 90 (NF90) has been implicated in the stabilization, transport and translational control of several target mRNAs. However, a systematic analysis of NF90 target mRNAs has not been performed. Here, we use ribonucleoprotein immunoprecipitation analysis to identify a large subset of NF90-associated mRNAs. Comparison of the 3'-untranslated regions (UTRs) of these mRNAs led to the elucidation of a 25- to 30-nucleotide, RNA signature motif rich in adenines and uracils. Insertion of the AU-rich NF90 motif ('NF90m') in the 3'UTR of an EGFP heterologous reporter did not affect the steady-state level of the chimeric EGFP-NF90m mRNA or its cytosolic abundance. Instead, the translation of EGFP-NF90m mRNA was specifically repressed in an NF90-dependent manner, as determined by analysing nascent EGFP translation, the distribution of chimeric mRNAs on polysome gradients and the steady-state levels of expressed EGFP protein. The interaction of endogenous NF90 with target mRNAs was validated after testing both endogenous mRNAs and recombinant biotinylated transcripts containing NF90 motif hits. Further analysis showed that the stability of endogenous NF90 target mRNAs was not significantly influenced by NF90 abundance, while their translation increased when NF90 levels were reduced. In summary, we have identified an AU-rich RNA motif present in NF90 target mRNAs and have obtained evidence that NF90 represses the translation of this subset of mRNAs.},
- author = {Kuwano, Yuki and Pullmann, Rudolf and Marasa, Bernard S. and Abdelmohsen, Kotb and Lee, Eun Kyung and Yang, Xiaoling and Martindale, Jennifer L. and Zhan, Ming and Gorospe, Myriam},
- doi = {10.1093/nar/gkp861},
- file = {:Users/ryan/Documents/Mendeley Desktop/Kuwano et al. - 2009 - NF90 selectively represses the translation of target mRNAs bearing an AU-rich signature motif.pdf:pdf},
- isbn = {1362-4962 (Electronic)$\backslash$n0305-1048 (Linking)},
- issn = {03051048},
- journal = {Nucleic Acids Research},
- number = {1},
- pages = {225--238},
- pmid = {19850717},
- title = {{NF90 selectively represses the translation of target mRNAs bearing an AU-rich signature motif}},
- volume = {38},
- year = {2009}
- }
- @article{Chung2014,
- abstract = {The Aspergillus fumigatus sterol regulatory element binding protein (SREBP) SrbA belongs to the basic Helix-Loop-Helix (bHLH) family of transcription factors and is crucial for antifungal drug resistance and virulence. The latter phenotype is especially striking, as loss of SrbA results in complete loss of virulence in murine models of invasive pulmonary aspergillosis (IPA). How fungal SREBPs mediate fungal virulence is unknown, though it has been suggested that lack of growth in hypoxic conditions accounts for the attenuated virulence. To further understand the role of SrbA in fungal infection site pathobiology, chromatin immunoprecipitation followed by massively parallel DNA sequencing (ChIP-seq) was used to identify genes under direct SrbA transcriptional regulation in hypoxia. These results confirmed the direct regulation of ergosterol biosynthesis and iron uptake by SrbA in hypoxia and revealed new roles for SrbA in nitrate assimilation and heme biosynthesis. Moreover, functional characterization of an SrbA target gene with sequence similarity to SrbA identified a new transcriptional regulator of the fungal hypoxia response and virulence, SrbB. SrbB co-regulates genes involved in heme biosynthesis and demethylation of C4-sterols with SrbA in hypoxic conditions. However, SrbB also has regulatory functions independent of SrbA including regulation of carbohydrate metabolism. Loss of SrbB markedly attenuates A. fumigatus virulence, and loss of both SREBPs further reduces in vivo fungal growth. These data suggest that both A. fumigatus SREBPs are critical for hypoxia adaptation and virulence and reveal new insights into SREBPs' complex role in infection site adaptation and fungal virulence.},
- author = {Chung, Dawoon and Barker, Bridget M. and Carey, Charles C. and Merriman, Brittney and Werner, Ernst R. and Lechner, Beatrix E. and Dhingra, Sourabh and Cheng, Chao and Xu, Wenjie and Blosser, Sara J. and Morohashi, Kengo and Mazurie, Aur{\'{e}}lien and Mitchell, Thomas K. and Haas, Hubertus and Mitchell, Aaron P. and Cramer, Robert A.},
- doi = {10.1371/journal.ppat.1004487},
- file = {:Users/ryan/Documents/Mendeley Desktop/Chung et al. - 2014 - ChIP-seq and In Vivo Transcriptome Analyses of the Aspergillus fumigatus SREBP SrbA Reveals a New Regulator of the.pdf:pdf},
- isbn = {1553-7366$\backslash$r1553-7374},
- issn = {15537374},
- journal = {PLoS Pathogens},
- number = {11},
- pmid = {25375670},
- title = {{ChIP-seq and In Vivo Transcriptome Analyses of the Aspergillus fumigatus SREBP SrbA Reveals a New Regulator of the Fungal Hypoxia Response and Virulence}},
- url = {http://journals.plos.org/plospathogens/article/file?id=10.1371/journal.ppat.1004487{\&}type=printable},
- volume = {10},
- year = {2014}
- }
- @article{Ecker2012,
- abstract = {The Encyclopedia of DNA Elements (ENCODE) project dishes up a hearty banquet of data that illuminate the roles of the functional elements of the human genome. Here, six scientists describe the project and discuss how the data are influencing research directions across many fields. See Articles p.57, p.75, p.83, p.91, p.101 {\&} Letter p.109},
- author = {Ecker, Joseph R. and Bickmore, Wendy a. and Barroso, In{\^{e}}s and Pritchard, Jonathan K. and Gilad, Yoav and Segal, Eran},
- doi = {10.1038/489052a},
- file = {:Users/ryan/Documents/Mendeley Desktop/Ecker et al. - 2012 - Genomics ENCODE explained.pdf:pdf},
- isbn = {1476-4687 (Electronic)$\backslash$n0028-0836 (Linking)},
- issn = {0028-0836},
- journal = {Nature},
- number = {7414},
- pages = {52--55},
- pmid = {22955614},
- title = {{Genomics: ENCODE explained}},
- url = {http://www.nature.com/nature/journal/v489/n7414/pdf/489052a.pdf},
- volume = {489},
- year = {2012}
- }
- @article{Liang2012,
- abstract = {ABSTRACT: BACKGROUND: ChIP-seq has become an important tool for identifying genome-wide protein-DNA interactions, including transcription factor binding and histone modifications. In ChIP-seq experiments, ChIP samples are usually coupled with their matching control samples. Proper normalization between the ChIP and control samples is an essential aspect of ChIP-seq data analysis RESULTS: We have developed a data-adaptive method for estimating the normalization factor between the ChIP and the control samples. Our method, named as NCIS (Normalization of ChIP-Seq) can accommodate both low and high sequencing depth datasets. We compare statistical properties of NCIS against existing methods in a set of diverse simulation settings, where NCIS enjoys the best estimation precision. In addition, we illustrate the impact of the normalization factor in FDR control and show that NCIS leads to more power among methods that control FDR at nominal levels. CONCLUSION: Our results indicate that the proper normalization between the ChIP and control samples is an important step in ChIP-seq analysis in terms of power and error rate control. Our proposed method shows excellent statistical properties and is useful in the full range of ChIP-seq applications, especially with deeply sequenced data.},
- author = {Liang, Kun and Keles, Sunduz},
- doi = {10.1186/1471-2105-13-199},
- file = {:Users/ryan/Documents/Mendeley Desktop/Liang, Keles - 2012 - Normalization of ChIP-seq data with control.pdf:pdf},
- isbn = {1471210513199},
- issn = {1471-2105},
- journal = {BMC bioinformatics},
- month = {aug},
- number = {1},
- pages = {199},
- pmid = {22883957},
- title = {{Normalization of ChIP-seq data with control.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/22883957},
- volume = {13},
- year = {2012}
- }
- @article{Shi2013,
- author = {Shi, Yang and Jiang, Hui},
- doi = {10.1371/journal.pone.0079448},
- editor = {Emmert-Streib, Frank},
- file = {:Users/ryan/Documents/Mendeley Desktop/Shi, Jiang - 2013 - rSeqDiff Detecting Differential Isoform Expression from RNA-Seq Data Using Hierarchical Likelihood Ratio Test.pdf:pdf},
- issn = {1932-6203},
- journal = {PLoS ONE},
- month = {nov},
- number = {11},
- pages = {e79448},
- title = {{rSeqDiff: Detecting Differential Isoform Expression from RNA-Seq Data Using Hierarchical Likelihood Ratio Test}},
- url = {http://dx.plos.org/10.1371/journal.pone.0079448},
- volume = {8},
- year = {2013}
- }
- @article{Barbie2009,
- abstract = {The proto-oncogene KRAS is mutated in a wide array of human cancers, most of which are aggressive and respond poorly to standard therapies. Although the identification of specific oncogenes has led to the development of clinically effective, molecularly targeted therapies in some cases, KRAS has remained refractory to this approach. A complementary strategy for targeting KRAS is to identify gene products that, when inhibited, result in cell death only in the presence of an oncogenic allele. Here we have used systematic RNA interference to detect synthetic lethal partners of oncogenic KRAS and found that the non-canonical IkappaB kinase TBK1 was selectively essential in cells that contain mutant KRAS. Suppression of TBK1 induced apoptosis specifically in human cancer cell lines that depend on oncogenic KRAS expression. In these cells, TBK1 activated NF-kappaB anti-apoptotic signals involving c-Rel and BCL-XL (also known as BCL2L1) that were essential for survival, providing mechanistic insights into this synthetic lethal interaction. These observations indicate that TBK1 and NF-kappaB signalling are essential in KRAS mutant tumours, and establish a general approach for the rational identification of co-dependent pathways in cancer.},
- author = {Barbie, David a and Tamayo, Pablo and Boehm, Jesse S and Kim, So Young and Moody, Susan E and Dunn, Ian F and Schinzel, Anna C and Sandy, Peter and Meylan, Etienne and Scholl, Claudia and Fr{\"{o}}hling, Stefan and Chan, Edmond M and Sos, Martin L and Michel, Kathrin and Mermel, Craig and Silver, Serena J and Weir, Barbara a and Reiling, Jan H and Sheng, Qing and Gupta, Piyush B and Wadlow, Raymond C and Le, Hanh and Hoersch, Sebastian and Wittner, Ben S and Ramaswamy, Sridhar and Livingston, David M and Sabatini, David M and Meyerson, Matthew and Thomas, Roman K and Lander, Eric S and Mesirov, Jill P and Root, David E and Gilliland, D Gary and Jacks, Tyler and Hahn, William C},
- doi = {10.1038/nature08460},
- file = {:Users/ryan/Documents/Mendeley Desktop/Barbie et al. - 2009 - Systematic RNA interference reveals that oncogenic KRAS-driven cancers require TBK1.pdf:pdf},
- issn = {1476-4687},
- journal = {Nature},
- keywords = {Alleles,Apoptosis,Cell Line, Tumor,Cell Survival,Gene Expression Profiling,Genes, Lethal,Genes, ras,Genes, ras: genetics,Humans,Lung Neoplasms,Lung Neoplasms: genetics,Lung Neoplasms: metabolism,Lung Neoplasms: pathology,Neoplasms,Neoplasms: genetics,Neoplasms: metabolism,Neoplasms: pathology,Oncogene Protein p21(ras),Oncogene Protein p21(ras): genetics,Oncogene Protein p21(ras): metabolism,Protein-Serine-Threonine Kinases,Protein-Serine-Threonine Kinases: antagonists {\&} in,Protein-Serine-Threonine Kinases: metabolism,Proto-Oncogene Proteins c-rel,Proto-Oncogene Proteins c-rel: metabolism,RNA Interference,Signal Transduction,bcl-X Protein,bcl-X Protein: metabolism},
- month = {nov},
- number = {7269},
- pages = {108--12},
- pmid = {19847166},
- title = {{Systematic RNA interference reveals that oncogenic KRAS-driven cancers require TBK1.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2783335{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {462},
- year = {2009}
- }
- @article{Jaitin2014,
- author = {Jaitin, D. a. and Kenigsberg, E. and Keren-Shaul, H. and Elefant, N. and Paul, F. and Zaretsky, I. and Mildner, a. and Cohen, N. and Jung, S. and Tanay, a. and Amit, I.},
- doi = {10.1126/science.1247651},
- file = {:Users/ryan/Documents/Mendeley Desktop/Jaitin et al. - 2014 - Massively Parallel Single-Cell RNA-Seq for Marker-Free Decomposition of Tissues into Cell Types(2).pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Jaitin et al. - 2014 - Massively Parallel Single-Cell RNA-Seq for Marker-Free Decomposition of Tissues into Cell Types.pdf:pdf},
- issn = {0036-8075},
- journal = {Science},
- month = {feb},
- number = {6172},
- pages = {776--779},
- title = {{Massively Parallel Single-Cell RNA-Seq for Marker-Free Decomposition of Tissues into Cell Types}},
- url = {http://www.sciencemag.org/cgi/doi/10.1126/science.1247651},
- volume = {343},
- year = {2014}
- }
- @article{Trapnell2010,
- abstract = {High-throughput mRNA sequencing (RNA-Seq) promises simultaneous transcript discovery and abundance estimation. However, this would require algorithms that are not restricted by prior gene annotations and that account for alternative transcription and splicing. Here we introduce such algorithms in an open-source software program called Cufflinks. To test Cufflinks, we sequenced and analyzed {\textgreater}430 million paired 75-bp RNA-Seq reads from a mouse myoblast cell line over a differentiation time series. We detected 13,692 known transcripts and 3,724 previously unannotated ones, 62{\%} of which are supported by independent expression data or by homologous genes in other species. Over the time series, 330 genes showed complete switches in the dominant transcription start site (TSS) or splice isoform, and we observed more subtle shifts in 1,304 other genes. These results suggest that Cufflinks can illuminate the substantial regulatory flexibility and complexity in even this well-studied model of muscle development and that it can improve transcriptome-based genome annotation.},
- author = {Trapnell, Cole and Williams, Brian a and Pertea, Geo and Mortazavi, Ali and Kwan, Gordon and van Baren, Marijke J and Salzberg, Steven L and Wold, Barbara J and Pachter, Lior},
- doi = {10.1038/nbt.1621},
- file = {:Users/ryan/Documents/Mendeley Desktop/Trapnell et al. - 2010 - Transcript assembly and quantification by RNA-Seq reveals unannotated transcripts and isoform switching during.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Trapnell et al. - 2010 - Transcript assembly and quantification by RNA-Seq reveals unannotated transcripts and isoform switching duri(2).pdf:pdf},
- issn = {1546-1696},
- journal = {Nature biotechnology},
- keywords = {Algorithms,Animals,Cell Differentiation,Cell Differentiation: genetics,Cell Line,Gene Expression Profiling,Gene Expression Profiling: methods,Genome,Messenger,Messenger: analysis,Messenger: genetics,Messenger: metabolism,Mice,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Protein Isoforms,Protein Isoforms: genetics,Protein Isoforms: metabolism,Proto-Oncogene Proteins c-myc,Proto-Oncogene Proteins c-myc: genetics,Proto-Oncogene Proteins c-myc: metabolism,RNA,RNA: methods,Sequence Analysis,Software},
- month = {may},
- number = {5},
- pages = {511--5},
- pmid = {20436464},
- publisher = {Nature Publishing Group},
- title = {{Transcript assembly and quantification by RNA-Seq reveals unannotated transcripts and isoform switching during cell differentiation.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3146043{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {28},
- year = {2010}
- }
- @article{Chen2013,
- abstract = {Identifying transcription factors (TF) involved in producing a genome-wide transcriptional profile is an essential step in building mechanistic model that can explain observed gene expression data. We developed a statistical framework for constructing genome-wide signatures of TF activity, and for using such signatures in the analysis of gene expression data produced by complex transcriptional regulatory programs. Our framework integrates ChIP-seq data and appropriately matched gene expression profiles to identify True REGulatory (TREG) TF-gene interactions. It provides genome-wide quantification of the likelihood of regulatory TF-gene interaction that can be used to either identify regulated genes, or as genome-wide signature of TF activity. To effectively use ChIP-seq data, we introduce a novel statistical model that integrates information from all binding “peaks” within 2 Mb window around a gene's transcription start site (TSS), and provides gene-level binding scores and probabilities of regulatory interaction. In the second step we integrate these binding scores and regulatory probabilities with gene expression data to assess the likelihood of True REGulatory (TREG) TF-gene interactions. We demonstrate the advantages of TREG framework in identifying genes regulated by two TFs with widely different distribution of functional binding events (ER$\alpha$ and E2f1). We also show that TREG signatures of TF activity vastly improve our ability to detect involvement of ER$\alpha$ in producing complex diseases-related transcriptional profiles. Through a large study of disease-related transcriptional signatures and transcriptional signatures of drug activity, we demonstrate that increase in statistical power associated with the use of TREG signatures makes the crucial difference in identifying key targets for treatment, and drugs to use for treatment. All methods are implemented in an open-source R package treg. The package also contains all data used in the analysis including 494 TREG binding profiles based on ENCODE ChIP-seq data. The treg package can be downloaded at http://GenomicsPortals.org.},
- author = {Chen, Jing and Hu, Zhen and Phatak, Mukta and Reichard, John and Freudenberg, Johannes M. and Sivaganesan, Siva and Medvedovic, Mario},
- doi = {10.1371/journal.pcbi.1003198},
- editor = {Morris, Quaid},
- file = {:Users/ryan/Documents/Mendeley Desktop/Chen et al. - 2013 - Genome-Wide Signatures of Transcription Factor Activity Connecting Transcription Factors, Disease, and Small Molecu.pdf:pdf},
- issn = {1553-7358},
- journal = {PLoS Computational Biology},
- month = {sep},
- number = {9},
- pages = {e1003198},
- title = {{Genome-Wide Signatures of Transcription Factor Activity: Connecting Transcription Factors, Disease, and Small Molecules}},
- url = {http://dx.plos.org/10.1371/journal.pcbi.1003198},
- volume = {9},
- year = {2013}
- }
- @article{Zisoulis2010,
- abstract = {MicroRNAs (miRNAs) regulate gene expression by guiding Argonaute proteins to specific target mRNA sequences. Identification of bona fide miRNA target sites in animals is challenging because of uncertainties regarding the base-pairing requirements between miRNA and target as well as the location of functional binding sites within mRNAs. Here we present the results of a comprehensive strategy aimed at isolating endogenous mRNA target sequences bound by the Argonaute protein ALG-1 in C. elegans. Using cross-linking and ALG-1 immunoprecipitation coupled with high-throughput sequencing (CLIP-seq), we identified extensive ALG-1 interactions with specific 3' untranslated region (UTR) and coding exon sequences and discovered features that distinguish miRNA complex binding sites in 3' UTRs from those in other genic regions. Furthermore, our analyses revealed a striking enrichment of Argonaute binding sites in genes important for miRNA function, suggesting an autoregulatory role that may confer robustness to the miRNA pathway.},
- author = {Zisoulis, Dimitrios G and Lovci, Michael T and Wilbert, Melissa L and Hutt, Kasey R and Liang, Tiffany Y and Pasquinelli, Amy E and Yeo, Gene W},
- doi = {10.1038/nsmb.1745},
- file = {:Users/ryan/Documents/Mendeley Desktop/Zisoulis et al. - 2010 - Comprehensive discovery of endogenous Argonaute binding sites in Caenorhabditis elegans.pdf:pdf},
- issn = {1545-9985},
- journal = {Nature structural {\&} molecular biology},
- keywords = {Animals,Base Sequence,Binding Sites,Caenorhabditis elegans,Caenorhabditis elegans Proteins,Caenorhabditis elegans Proteins: metabolism,Caenorhabditis elegans: physiology,Chromatin Immunoprecipitation,Eukaryotic Initiation Factors,Eukaryotic Initiation Factors: metabolism,MicroRNAs,MicroRNAs: metabolism,Molecular Sequence Data,RNA, Helminth,RNA, Helminth: metabolism,RNA, Messenger,RNA, Messenger: metabolism,Sequence Analysis, DNA},
- month = {feb},
- number = {2},
- pages = {173--9},
- pmid = {20062054},
- publisher = {Nature Publishing Group},
- title = {{Comprehensive discovery of endogenous Argonaute binding sites in Caenorhabditis elegans.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2834287{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {17},
- year = {2010}
- }
- @article{VanGurp2013,
- abstract = {Priming of random hexamers in cDNA synthesis is known to show sequence bias, but in addition it has been suggested recently that mismatches in random hexamer priming could be a cause of mismatches between the original RNA fragment and observed sequence reads. To explore random hexamer mispriming as a potential source of these errors, we analyzed two independently generated RNA-seq datasets of synthetic ERCC spikes for which the reference is known. First strand cDNA synthesized by random hexamer priming on RNA showed consistent position and nucleotide-specific mismatch errors in the first seven nucleotides. The mismatch errors found in both datasets are consistent in distribution and thermodynamically stable mismatches are more common. This strongly indicates that RNA-DNA mispriming of specific random hexamers causes these errors. Due to their consistency and specificity, mispriming errors can have profound implications for downstream applications if not dealt with properly.},
- author = {van Gurp, Thomas P and McIntyre, Lauren M and Verhoeven, Koen J F},
- doi = {10.1371/journal.pone.0085583},
- file = {:Users/ryan/Documents/Mendeley Desktop/van Gurp, McIntyre, Verhoeven - 2013 - Consistent errors in first strand cDNA due to random hexamer mispriming.pdf:pdf},
- issn = {1932-6203},
- journal = {PloS one},
- month = {jan},
- number = {12},
- pages = {e85583},
- pmid = {24386481},
- title = {{Consistent errors in first strand cDNA due to random hexamer mispriming.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3875578{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {8},
- year = {2013}
- }
- @article{Yokoi2018,
- abstract = {Most of type 2 diabetes (T2D) is thought to be the result of interaction between genetic and environmental factors. However, the genetic components discovered to date can explain only a small proportion of the observed heritability. The "missing heritability" may be accounted for by rare variants, gene-environment interactions, and epigenetics.},
- author = {Yokoi, Norihide},
- doi = {10.1111/jdi.12724},
- file = {:Users/ryan/Documents/Mendeley Desktop/Yokoi - 2018 - Epigenetic dysregulation in pancreatic islets and pathogenesis of type 2 diabetes.pdf:pdf},
- issn = {20401124},
- journal = {Journal of Diabetes Investigation},
- number = {3},
- pages = {475--477},
- title = {{Epigenetic dysregulation in pancreatic islets and pathogenesis of type 2 diabetes}},
- volume = {9},
- year = {2018}
- }
- @article{Benjamini2016,
- abstract = {Scientists use high-dimensional measurement assays to detect and prioritize regions of strong signal in a spatially organized domain. Examples include finding methylation enriched genomic regions using microarrays and identifying active cortical areas using brain-imaging. The most common procedure for detecting potential regions is to group together neighboring sites where the signal passed a threshold. However, one needs to account for the selection bias induced by this opportunistic procedure to avoid diminishing e↵ects when generalizing to a population. In this paper, we present a model and a method that permit population inference for these de-tected regions. In particular, we provide non-asymptotic point and confidence interval estimates for mean e↵ect in the region, which account for the local selection bias and the non-stationary covariance that is typical of these data. Such summaries allow researchers to better compare regions of di↵erent sizes and di↵erent correlation structures. Inference is provided within a conditional one-parameter exponential family for each region, with truncations that match the constraints of selection. A secondary screening-and-adjustment step allows pruning the set of detected regions, while controlling the false-coverage rate for the set of regions that are re-ported. We illustrate the benefits of the method by applying it to detected genomic regions with di↵ering DNA-methylation rates across tissue types. Our method is shown to provide superior power compared to non-parametric approaches.},
- author = {Benjamini, Yuval and Taylor, Jonathan and Irizarry, Rafael A},
- doi = {10.1101/082321},
- file = {:Users/ryan/Documents/Mendeley Desktop/Benjamini, Taylor, Irizarry - 2016 - Selection Corrected Statistical Inference for Region Detection with High-throughput Assays.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Benjamini, Taylor, Irizarry - 2016 - Selection Corrected Statistical Inference for Region Detection with High-throughput Assays(3).pdf:pdf},
- month = {oct},
- pages = {1--35},
- title = {{Selection Corrected Statistical Inference for Region Detection with High-throughput Assays}},
- url = {http://biorxiv.org/content/biorxiv/early/2016/10/23/082321.full.pdf http://biorxiv.org/lookup/doi/10.1101/082321},
- year = {2016}
- }
- @article{Stegle2010,
- author = {Stegle, Oliver and Drewe, Philipp and Bohnert, Regina},
- file = {:Users/ryan/Documents/Mendeley Desktop/Stegle, Drewe, Bohnert - 2010 - Statistical tests for detecting differential RNA-transcript expression from read counts.pdf:pdf},
- journal = {Nat {\ldots}},
- pages = {1--11},
- title = {{Statistical tests for detecting differential RNA-transcript expression from read counts}},
- url = {http://www.researchgate.net/publication/43650353{\_}Statistical{\_}Tests{\_}for{\_}Detecting{\_}Differential{\_}RNA-Transcript{\_}Expression{\_}from{\_}Read{\_}Counts/file/79e4150b26217c633e.pdf},
- year = {2010}
- }
- @article{Leek2007,
- abstract = {It has unambiguously been shown that genetic, environmental, demographic, and technical factors may have substantial effects on gene expression levels. In addition to the measured variable(s) of interest, there will tend to be sources of signal due to factors that are unknown, unmeasured, or too complicated to capture through simple models. We show that failing to incorporate these sources of heterogeneity into an analysis can have widespread and detrimental effects on the study. Not only can this reduce power or induce unwanted dependence across genes, but it can also introduce sources of spurious signal to many genes. This phenomenon is true even for well-designed, randomized studies. We introduce "surrogate variable analysis" (SVA) to overcome the problems caused by heterogeneity in expression studies. SVA can be applied in conjunction with standard analysis techniques to accurately capture the relationship between expression and any modeled variables of interest. We apply SVA to disease class, time course, and genetics of gene expression studies. We show that SVA increases the biological accuracy and reproducibility of analyses in genome-wide expression studies.},
- archivePrefix = {arXiv},
- arxivId = {NIHMS150003},
- author = {Leek, Jeffrey T. and Storey, John D.},
- doi = {10.1371/journal.pgen.0030161},
- eprint = {NIHMS150003},
- file = {:Users/ryan/Documents/Mendeley Desktop/Leek, Storey - 2007 - Capturing heterogeneity in gene expression studies by surrogate variable analysis.pdf:pdf},
- isbn = {1553-7404 (Electronic)$\backslash$n1553-7390 (Linking)},
- issn = {15537390},
- journal = {PLoS Genetics},
- keywords = {Algorithms,Breast Neoplasms,Breast Neoplasms: genetics,Computer Simulation,Data Interpretation, Statistical,Epigenesis, Genetic,Female,Gene Expression,Genes, BRCA1,Genes, BRCA2,Genetic Heterogeneity,Genetic Linkage,Genome, Fungal,Genome, Human,Humans,Kidney,Kidney: metabolism,Linear Models,Mutation,Oligonucleotide Array Sequence Analysis,Quantitative Trait, Heritable,Reproducibility of Results,Saccharomyces cerevisiae,Saccharomyces cerevisiae: genetics,Saccharomyces cerevisiae: metabolism,Time Factors},
- month = {sep},
- number = {9},
- pages = {1724--1735},
- pmid = {17907809},
- title = {{Capturing heterogeneity in gene expression studies by surrogate variable analysis}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=1994707{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {3},
- year = {2007}
- }
- @article{Reyes2013,
- abstract = {Alternative usage of exons provides genomes with plasticity to produce different transcripts from the same gene, modulating the function, localization, and life cycle of gene products. It affects most human genes. For a limited number of cases, alternative functions and tissue-specific roles are known. However, recent high-throughput sequencing studies have suggested that much alternative isoform usage across tissues is nonconserved, raising the question of the extent of its functional importance. We address this question in a genome-wide manner by analyzing the transcriptomes of five tissues for six primate species, focusing on exons that are 1:1 orthologous in all six species. Our results support a model in which differential usage of exons has two major modes: First, most of the exons show only weak differences, which are dominated by interspecies variability and may reflect neutral drift and noisy splicing. These cases dominate the genome-wide view and explain why conservation appears to be so limited. Second, however, a sizeable minority of exons show strong differences between tissues, which are mostly conserved. We identified a core set of 3,800 exons from 1,643 genes that show conservation of strongly tissue-dependent usage patterns from human at least to macaque. This set is enriched for exons encoding protein-disordered regions and untranslated regions. Our findings support the theory that isoform regulation is an important target of evolution in primates, and our method provides a powerful tool for discovering potentially functional tissue-dependent isoforms.},
- author = {Reyes, Alejandro and Anders, Simon and Weatheritt, Robert J and Gibson, Toby J and Steinmetz, Lars M and Huber, Wolfgang},
- doi = {10.1073/pnas.1307202110},
- file = {:Users/ryan/Documents/Mendeley Desktop/Reyes et al. - 2013 - Drift and conservation of differential exon usage across tissues in primate species.pdf:pdf},
- issn = {1091-6490},
- journal = {Proceedings of the National Academy of Sciences of the United States of America},
- month = {sep},
- number = {38},
- pages = {15377--82},
- pmid = {24003148},
- title = {{Drift and conservation of differential exon usage across tissues in primate species.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3780897{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {110},
- year = {2013}
- }
- @article{Winn2010,
- abstract = {The use of mouse blood as a model for human blood is often considered in the development of clinically relevant, gene expression-based disease biomarkers. However, the ability to derive biologically meaningful insights from microarray-based gene expression patterns in mouse whole blood, as in human whole blood, is hindered by high levels of globin mRNA. In order to characterize the effects of globin reduction on gene expression of peripheral mouse blood, we performed gene set enrichment analysis on genes identified as expressed in blood via microarray-based genome-wide transcriptome analysis. Depletion of globin mRNA enhanced the quality of microarray data as shown by improved gene expression detection and increased sensitivity. Compared to genes expressed in whole blood, genes detected as expressed in blood following globin reduction were enriched for low abundance transcripts implicated in many biological pathways, including development, g-protein signaling, and immune response. Broadly, globin reduction resulted in improved detection of expressed genes that serve as molecular binding proteins and enzymes in cellular metabolism, intracellular transport/localization, transcription, and translation, as well as genes that potentially could act as biomarkers for diseases such as schizophrenia. These significantly enriched pathways overlap considerably with those identified in globin-reduced human blood suggesting that globin-reduced mouse blood gene expression studies may be useful for identifying genes relevant to human disease. Overall, the results of this investigation provide a better understanding of the impact of reducing globin transcripts in mouse blood and highlight the potential of microarray-based, globin-reduced, mouse blood gene expression studies in biomarker development.},
- author = {Winn, ME and Zapala, MA and Hovatta, Iiris},
- doi = {10.1007/s00335-010-9261-y},
- isbn = {0033501092},
- journal = {Mammalian {\ldots}},
- month = {jun},
- number = {5-6},
- pages = {268--75},
- title = {{The effects of globin on microarray-based gene expression analysis of mouse blood}},
- url = {http://link.springer.com/article/10.1007/s00335-010-9261-y},
- volume = {21},
- year = {2010}
- }
- @article{Love2015,
- abstract = {RNA-seq technology is widely used in biomedical and basic science research. These studies rely on complex computational methods that quantify expression levels for observed transcripts. We find that current computational methods can lead to hundreds of false positive results related to alternative isoform usage. This flaw in the current methodology stems from a lack of modeling sample-specific bias that leads to drops in coverage and is related to sequence features like fragment GC content and GC stretches. By incorporating features that explain this bias into transcript expression models, we greatly increase the specificity of transcript expression estimates, with more than a four-fold reduction in the number of false positives for reported changes in expression. We introduce alpine, a method for estimation of bias-corrected transcript abundance. The method is available as a Bioconductor package that includes data visualization tools useful for bias discovery.},
- author = {Love, Michael I and Hogenesch, John B and Irizarry, Rafael A},
- doi = {10.1101/025767},
- file = {:Users/ryan/Documents/Mendeley Desktop/Love, Hogenesch, Irizarry - 2015 - Modeling of RNA-seq fragment sequence bias reduces systematic errors in transcript abundance estimati.pdf:pdf},
- isbn = {1546-1696 (Electronic)
1087-0156 (Linking)},
- issn = {1087-0156},
- journal = {bioRxiv},
- pages = {025767},
- pmid = {27669167},
- title = {{Modeling of RNA-seq fragment sequence bias reduces systematic errors in transcript abundance estimation}},
- url = {http://biorxiv.org/content/early/2015/08/28/025767.abstract},
- year = {2015}
- }
- @article{Nowicka2016,
- author = {Nowicka, Malgorzata and Robinson, Mark D.},
- doi = {10.12688/f1000research.8900.1},
- file = {:Users/ryan/Documents/Mendeley Desktop/Nowicka, Robinson - 2016 - DRIMSeq a Dirichlet-multinomial framework for multivariate count outcomes in genomics.pdf:pdf},
- issn = {2046-1402},
- journal = {F1000Research},
- number = {0},
- pages = {1356},
- title = {{DRIMSeq: a Dirichlet-multinomial framework for multivariate count outcomes in genomics}},
- url = {http://f1000research.com/articles/5-1356/v1},
- volume = {5},
- year = {2016}
- }
- @article{Rangaraju2015a,
- abstract = {Longevity mechanisms increase lifespan by counteracting the effects of aging. However, whether longevity mechanisms counteract the effects of aging continually throughout life, or whether they act during specific periods of life, preventing changes that precede mortality is unclear. Here, we uncover transcriptional drift, a phenomenon that describes how aging causes genes within functional groups to change expression in opposing directions. These changes cause a transcriptome-wide loss in mRNA stoichiometry and loss of co-expression patterns in aging animals, as compared to young adults. Using Caenorhabditis elegans as a model, we show that extending lifespan by inhibiting serotonergic signals by the antidepressant mianserin attenuates transcriptional drift, allowing the preservation of a younger transcriptome into an older age. Our data are consistent with a model in which inhibition of serotonergic signals slows age-dependent physiological decline and the associated rise in mortality levels exclusively in young adults, thereby postponing the onset of major mortality.},
- author = {Rangaraju, Sunitha and Solis, Gregory M. and Thompson, Ryan C. and Gomez-Amaro, Rafael L. and Kurian, Leo and Encalada, Sandra E. and Niculescu, Alexander B. and Salomon, Daniel R. and Petrascheck, Michael},
- doi = {10.7554/eLife.08833},
- file = {:Users/ryan/Documents/Mendeley Desktop/Rangaraju et al. - 2015 - Suppression of transcriptional drift extends C. elegans lifespan by postponing the onset of mortality.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Rangaraju et al. - 2015 - Suppression of transcriptional drift extends C. elegans lifespan by postponing the onset of mortality.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Rangaraju et al. - 2015 - Suppression of transcriptional drift extends C. elegans lifespan by postponing the onset of mortality(3).pdf:pdf},
- isbn = {2050-084x},
- issn = {2050084X},
- journal = {eLife},
- month = {dec},
- number = {December2015},
- pages = {1--39},
- pmid = {26623667},
- title = {{Suppression of transcriptional drift extends C. elegans lifespan by postponing the onset of mortality}},
- url = {https://elifesciences.org/articles/08833 https://elifesciences.org/content/4/e08833-download.pdf http://elifesciences.org/lookup/doi/10.7554/eLife.08833},
- volume = {4},
- year = {2015}
- }
- @incollection{Chen2014,
- abstract = {This article reviews the statistical theory underlying the edgeR software package for differential expression of RNA-seq data. Negative binomial models are used to capture the quadratic mean-variance relationship that can be observed in RNA-seq data. Conditional likelihood methods are used to avoid bias when estimating the level of variation. Empirical Bayes methods are used to allow gene-specific variation estimates even when the number of replicate samples is very small. Generalized linear models are used to accommodate arbitrarily complex designs. A key feature of the edgeR package is the use of weighted likelihood methods to implement a flexible empirical Bayes approach in the absence of easily tractable sampling distributions. The methodology is implemented in flexible software that is easy to use even for users who are not professional statisticians or bioinformaticians. The software is part of the Bioconductor project.},
- address = {Cham},
- author = {Chen, Yunshun and Lun, Aaron T L and Smyth, Gordon K},
- booktitle = {Statistical Analysis of Next Generation Sequencing Data},
- doi = {10.1007/978-3-319-07212-8_3},
- editor = {Datta, Somnath and Nettleton, Dan},
- file = {:Users/ryan/Documents/Mendeley Desktop/Chen, Lun, Smyth - 2014 - Differential Expression Analysis of Complex RNA-seq Experiments Using edgeR.pdf:pdf},
- isbn = {978-3-319-07212-8},
- pages = {51--74},
- publisher = {Springer International Publishing},
- title = {{Differential Expression Analysis of Complex RNA-seq Experiments Using edgeR}},
- url = {http://link.springer.com/10.1007/978-3-319-07212-8{\_}3},
- year = {2014}
- }
- @article{Si2013a,
- abstract = {MOTIVATION: RNA-seq technology has been widely adopted as an attractive alternative to microarray-based methods to study global gene expression. However, robust statistical tools to analyze these complex datasets are still lacking. By grouping genes with similar expression profiles across treatments, cluster analysis provides insight into gene functions and networks and hence is an important technique for RNA-seq data analysis.
- RESULTS: In this manuscript, we derive clustering algorithms based on appropriate probability models for RNA-seq data. An Expectation-Maximization (EM) algorithm and another two stochastic versions of EM algorithms are described. In addition, a strategy for initialization based on likelihood is proposed to improve the clustering algorithms. Moreover, we present a model-based hybrid-hierarchical clustering method to generate a tree structure that allows visualization of relationships among clusters as well as flexibility of choosing the number of clusters. Results from both simulation studies and analysis of a maize RNA-seq dataset show that our proposed methods provide better clustering results than alternative methods such as the K-means algorithm and hierarchical clustering methods that are not based on probability models.
- AVAILABILITY: An R package, MBCluster.Seq, has been developed to implement our proposed algorithms. This R package provides fast computation and is publicly available at http://www.r-project.org.
- CONTACT: siyaqing@iastate.edu and pliu@iastate.edu.},
- author = {Si, Yaqing and Liu, Peng and Li, Pinghua and Brutnell, Thomas P},
- doi = {10.1093/bioinformatics/btt632},
- file = {:Users/ryan/Documents/Mendeley Desktop/Si et al. - 2013 - Model-Based Clustering for RNA-Seq Data.pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- month = {nov},
- pages = {1--9},
- pmid = {24191069},
- title = {{Model-Based Clustering for RNA-Seq Data.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/24191069},
- year = {2013}
- }
- @article{Weng2006,
- abstract = {MOTIVATION: In microarray gene expression studies, the number of replicated microarrays is usually small because of cost and sample availability, resulting in unreliable variance estimation and thus unreliable statistical hypothesis tests. The unreliable variance estimation is further complicated by the fact that the technology-specific variance is intrinsically intensity-dependent.
- RESULTS: The Rosetta error model captures the variance-intensity relationship for various types of microarray technologies, such as single-color arrays and two-color arrays. This error model conservatively estimates intensity error and uses this value to stabilize the variance estimation. We present two commonly used error models: the intensity error-model for single-color microarrays and the ratio error model for two-color microarrays or ratios built from two single-color arrays. We present examples to demonstrate the strength of our error models in improving statistical power of microarray data analysis, particularly, in increasing expression detection sensitivity and specificity when the number of replicates is limited.},
- author = {Weng, Lee and Dai, Hongyue and Zhan, Yihui and He, Yudong and Stepaniants, Sergey B and Bassett, Douglas E},
- doi = {10.1093/bioinformatics/btl045},
- file = {:Users/ryan/Documents/Mendeley Desktop/Weng et al. - 2006 - Rosetta error model for gene expression analysis.pdf:pdf},
- issn = {1367-4803},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {Algorithms,Analysis of Variance,Computer Simulation,Data Interpretation, Statistical,Gene Expression,Gene Expression Profiling,Gene Expression Profiling: methods,Gene Expression: physiology,Genetic Variation,Models, Genetic,Models, Statistical,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Reproducibility of Results,Sensitivity and Specificity},
- month = {may},
- number = {9},
- pages = {1111--21},
- pmid = {16522673},
- title = {{Rosetta error model for gene expression analysis.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/16522673},
- volume = {22},
- year = {2006}
- }
- @article{Zachariah2018,
- abstract = {OBJECTIVES: Prognostic implications of early protocol biopsies have been studied; however, the value of late protocol biopsy in predicting graft outcome has not been well defined. Here, we compared the effects of early and late protocol biopsy histologic findings in stable kidney allografts and aimed to understand the significance of "borderline" rejection on allograft function. MATERIALS AND METHODS: We studied 261 biopsies from 159 renal transplant recipients who were on a steroid-free, calcineurin inhibitor and mycophenolate mofetil regimen and who received transplants between 2004 and 2012 with mean follow-up of 5 years. Early (between 3 and 9 mo) and subsequent late (between 12 and 24 mo) protocol biopsies were performed. Biopsies were classified as normal, interstitial fibrosis and/or tubular atrophy, subclinical acute rejection with interstitial fibrosis and/or tubular atrophy, and borderline rejection with interstitial fibrosis and/or tubular atrophy. A linear mixed-effects model was used to determine the effects of early and late protocol biopsies on estimated glomerular filtration rate changes, with baseline time for estimated glomerular filtration rate fixed at 12 months. RESULTS: The adjusted model showed that estimated glomerular filtration rate at 3 months, donor age, delayed graft function, and early protocol biopsies were associated with baseline estimated glomerular filtration rate at 12 months. Estimated glomerular filtration rate changes over time were associated with findings of interstitial fibrosis and/or tubular atrophy at early biopsy and subclinical acute rejection and borderline rejection at late biopsy. At last follow-up, final estimated glomerular filtration rate was significantly associated with interstitial fibrosis and/or tubular atrophy at early biopsy and with subclinical acute rejection at late biopsy. CONCLUSIONS: Although early protocol biopsy predicted baseline estimated glomerular filtration rate, late biopsy was important for predicting changes in function over time. In addition, a diagnosis of "borderline" rejection on protocol biopsies predicted long-term graft function.},
- author = {Zachariah, Mareena S. and Dwivedi, Alok K. and Yip, Cindy S. and Chang, Shirley S. and Gundroo, Aijaz and Venuto, Rocco C. and Tomaszewski, John and Patel, Sunil K. and Sharma, Rajeev},
- doi = {10.6002/ect.2016.0323},
- file = {:Users/ryan/Documents/Mendeley Desktop/Zachariah et al. - 2018 - Utility of serial protocol biopsies performed after 1 year in predicting long-term kidney allograft function a.pdf:pdf},
- issn = {13040855},
- journal = {Experimental and Clinical Transplantation},
- keywords = {Borderline rejection,Interstitial fibrosis and/or tubular atrophy,Subclinical acute rejection,Surveillance biopsies},
- number = {4},
- pages = {391--400},
- title = {{Utility of serial protocol biopsies performed after 1 year in predicting long-term kidney allograft function according to histologic phenotype}},
- volume = {16},
- year = {2018}
- }
- @phdthesis{Phipson2013Thesis,
- abstract = {New biotechnology developments such as the microarray, and more recently, next generation sequencing, have necessitated the need for new statistical methodologies to be developed. These methods are designed to combat unique issues present in the data generated by these technologies. They provide the perfect environment for information sharing strategies, such as empirical Bayes methods, due to the large numbers of simulataneous tests performed. We explore different estimators of the proportion of true null hypotheses and develop a fast and accurate estimator which is valid for any number of p-values. This estimator is based on local false discovery rates and is used in several of the proceeding sections. Another interest is in developing robust hyper-parameter estimators in an empirical Bayes hierarchical model setting. An estimator for the prior degrees of freedom which is robust to outliers is developed using two different approaches. This has the effect that highly variable genes are unlikely to be significantly differentially expressed, as well as increasing power to detect differential expression. The second half of the thesis focuses on gaining more information from the log fold changes obtained from microarray and sequencing experiments. More accurate log fold changes are developed for microarrays and RNA sequencing data, which provide additional information for ranking top differentially expressed genes. The new measure, called predictive log fold change, arises from the posterior distribution of the log fold changes. The relationship between two gene expression profiles is quantified when the p-values obtained from testing two hypotheses are not independent. This arises when two genotypes are compared to a common control group. The method is based on separating the true biological correlation from the technical correlation of the log fold changes. The hyperparameters of the prior distribution for the log fold changes need to be estimated in order to get an estimate of the biological correlation. This is possible since we show that the two dependent moderated t statistics have a scaled multivariate t distribution. The methods developed in this thesis are tested using simulations and applied to data sets collected in collaboration with biologists at The Walter and Eliza Hall Institute of Medical Research.},
- author = {Phipson, Belinda},
- keywords = {bayesian analysis,bioinformatics,empirical bayes,gene expression,microarrays,sequencing},
- language = {eng},
- school = {The Walter, Eliza Hall Institute of Medical Research {\&} The University of Melbourne},
- title = {{Empirical bayes modelling of expression profiles and their associations}},
- url = {http://hdl.handle.net/11343/38162 file:///bitstream/handle/11343/38162/299082{\_}BelindaPhipson.pdf?sequence=1{\&}isAllowed=n},
- year = {2013}
- }
- @article{Sun2013,
- abstract = {BACKGROUND: Differential expression analysis based on "next-generation" sequencing technologies is a fundamental means of studying RNA expression. We recently developed a multi-step normalization method (called TbT) for two-group RNA-seq data with replicates and demonstrated that the statistical methods available in four R packages (edgeR, DESeq, baySeq, and NBPSeq) together with TbT can produce a well-ranked gene list in which true differentially expressed genes (DEGs) are top-ranked and non-DEGs are bottom ranked. However, the advantages of the current TbT method come at the cost of a huge computation time. Moreover, the R packages did not have normalization methods based on such a multi-step strategy.
- RESULTS: TCC (an acronym for Tag Count Comparison) is an R package that provides a series of functions for differential expression analysis of tag count data. The package incorporates multi-step normalization methods, whose strategy is to remove potential DEGs before performing the data normalization. The normalization function based on this DEG elimination strategy (DEGES) includes (i) the original TbT method based on DEGES for two-group data with or without replicates, (ii) much faster methods for two-group data with or without replicates, and (iii) methods for multi-group comparison. TCC provides a simple unified interface to perform such analyses with combinations of functions provided by edgeR, DESeq, and baySeq. Additionally, a function for generating simulation data under various conditions and alternative DEGES procedures consisting of functions in the existing packages are provided. Bioinformatics scientists can use TCC to evaluate their methods, and biologists familiar with other R packages can easily learn what is done in TCC.
- CONCLUSION: DEGES in TCC is essential for accurate normalization of tag count data, especially when up- and down-regulated DEGs in one of the samples are extremely biased in their number. TCC is useful for analyzing tag count data in various scenarios ranging from unbiased to extremely biased differential expression. TCC is available at http://www.iu.a.u-tokyo.ac.jp/{\~{}}kadota/TCC/ and will appear in Bioconductor (http://bioconductor.org/) from ver. 2.13.},
- author = {Sun, Jianqiang and Nishiyama, Tomoaki and Shimizu, Kentaro and Kadota, Koji},
- doi = {10.1186/1471-2105-14-219},
- file = {:Users/ryan/Documents/Mendeley Desktop/Sun et al. - 2013 - TCC an R package for comparing tag count data with robust normalization strategies.pdf:pdf},
- issn = {1471-2105},
- journal = {BMC bioinformatics},
- month = {jan},
- number = {1},
- pages = {219},
- pmid = {23837715},
- publisher = {BMC Bioinformatics},
- title = {{TCC: an R package for comparing tag count data with robust normalization strategies.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3716788{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {14},
- year = {2013}
- }
- @article{Yi2017,
- author = {Yi, Haidong and Raman, Ayush T and Zhang, Han and Allen, Genevera I and Liu, Zhandong},
- doi = {10.1093/bioinformatics/btx635},
- file = {:Users/ryan/Documents/Mendeley Desktop/Yi et al. - 2017 - Detecting hidden batch factors through data-adaptive adjustment for biological effects.pdf:pdf},
- issn = {1367-4803},
- journal = {Bioinformatics},
- number = {April},
- pages = {1141--1147},
- pmid = {29617963},
- title = {{Detecting hidden batch factors through data-adaptive adjustment for biological effects}},
- url = {http://academic.oup.com/bioinformatics/advance-article/doi/10.1093/bioinformatics/btx635/4386916},
- volume = {34},
- year = {2017}
- }
- @article{Winter2014,
- abstract = {This tutorial serves as a quick boot camp to jump-start your own analyses with linear mixed effects models. This text is different from other introductions by being decidedly conceptual; I will focus on why you want to use mixed models and how you should use them. While many introductions to this topic can be very daunting to readers who lake the appropriate statistical background, this text is going to be a softer kind of introduction{\ldots} so, don't panic! The},
- author = {Winter, Bodo},
- file = {:Users/ryan/Documents/Mendeley Desktop/Winter - 2014 - A very basic tutorial for performing linear mixed effects analyses.pdf:pdf},
- title = {{A very basic tutorial for performing linear mixed effects analyses}},
- url = {http://www.bodowinter.com/tutorial/bw{\_}LME{\_}tutorial2.pdf},
- year = {2014}
- }
- @article{Chen2007,
- abstract = {Orthology detection is critically important for accurate functional annotation, and has been widely used to facilitate studies on comparative and evolutionary genomics. Although various methods are now available, there has been no comprehensive analysis of performance, due to the lack of a genomic-scale 'gold standard' orthology dataset. Even in the absence of such datasets, the comparison of results from alternative methodologies contains useful information, as agreement enhances confidence and disagreement indicates possible errors. Latent Class Analysis (LCA) is a statistical technique that can exploit this information to reasonably infer sensitivities and specificities, and is applied here to evaluate the performance of various orthology detection methods on a eukaryotic dataset. Overall, we observe a trade-off between sensitivity and specificity in orthology detection, with BLAST-based methods characterized by high sensitivity, and tree-based methods by high specificity. Two algorithms exhibit the best overall balance, with both sensitivity and specificity{\textgreater}80{\%}: INPARANOID identifies orthologs across two species while OrthoMCL clusters orthologs from multiple species. Among methods that permit clustering of ortholog groups spanning multiple genomes, the (automated) OrthoMCL algorithm exhibits better within-group consistency with respect to protein function and domain architecture than the (manually curated) KOG database, and the homolog clustering algorithm TribeMCL as well. By way of using LCA, we are also able to comprehensively assess similarities and statistical dependence between various strategies, and evaluate the effects of parameter settings on performance. In summary, we present a comprehensive evaluation of orthology detection on a divergent set of eukaryotic genomes, thus providing insights and guides for method selection, tuning and development for different applications. Many biological questions have been addressed by multiple tests yielding binary (yes/no) outcomes but no clear definition of truth, making LCA an attractive approach for computational biology.},
- author = {Chen, Feng and Mackey, Aaron J and Vermunt, Jeroen K and Roos, David S},
- doi = {10.1371/journal.pone.0000383},
- file = {:Users/ryan/Documents/Mendeley Desktop/Chen et al. - 2007 - Assessing performance of orthology detection strategies applied to eukaryotic genomes.pdf:pdf},
- issn = {1932-6203},
- journal = {PloS one},
- keywords = {Algorithms,Eukaryotic Cells,Genome},
- month = {jan},
- number = {4},
- pages = {e383},
- pmid = {17440619},
- title = {{Assessing performance of orthology detection strategies applied to eukaryotic genomes.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=1849888{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {2},
- year = {2007}
- }
- @article{Harta,
- author = {Hart, G Traver and Komori, H Kiyomi and Lamere, Sarah A and Podshivalova, Katie and Grigoriev, Yevgeniy A and Salomon, Daniel R},
- file = {:Users/ryan/Documents/Mendeley Desktop/Hart et al. - Unknown - No Title.pdf:pdf},
- title = {{No Title}}
- }
- @article{Bray2016,
- archivePrefix = {arXiv},
- arxivId = {1505.02710},
- author = {Bray, Nicolas L and Pimentel, Harold and Melsted, P{\'{a}}ll and Pachter, Lior},
- doi = {10.1038/nbt.3519},
- eprint = {1505.02710},
- file = {:Users/ryan/Documents/Mendeley Desktop/Bray et al. - 2016 - Near-optimal probabilistic RNA-seq quantification.pdf:pdf},
- isbn = {1546-1696 (Electronic)
1087-0156 (Linking)},
- issn = {1087-0156},
- journal = {Nature Biotechnology},
- number = {5},
- pages = {525--527},
- pmid = {27043002},
- title = {{Near-optimal probabilistic RNA-seq quantification}},
- url = {http://www.nature.com/doifinder/10.1038/nbt.3519},
- volume = {34},
- year = {2016}
- }
- @article{Irizarry2003a,
- abstract = {In this paper we report exploratory analyses of high-density oligonucleotide array data from the Affymetrix GeneChip system with the objective of improving upon currently used measures of gene expression. Our analyses make use of three data sets: a small experimental study consisting of five MGU74A mouse GeneChip arrays, part of the data from an extensive spike-in study conducted by Gene Logic and Wyeth's Genetics Institute involving 95 HG-U95A human GeneChip arrays; and part of a dilution study conducted by Gene Logic involving 75 HG-U95A GeneChip arrays. We display some familiar features of the perfect match and mismatch probe (PM and MM) values of these data, and examine the variance-mean relationship with probe-level data from probes believed to be defective, and so delivering noise only. We explain why we need to normalize the arrays to one another using probe level intensities. We then examine the behavior of the PM and MM using spike-in data and assess three commonly used summary measures: Affymetrix's (i) average difference (AvDiff) and (ii) MAS 5.0 signal, and (iii) the Li and Wong multiplicative model-based expression index (MBEI). The exploratory data analyses of the probe level data motivate a new summary measure that is a robust multi-array average (RMA) of background-adjusted, normalized, and log-transformed PM values. We evaluate the four expression summary measures using the dilution study data, assessing their behavior in terms of bias, variance and (for MBEI and RMA) model fit. Finally, we evaluate the algorithms in terms of their ability to detect known levels of differential expression using the spike-in data. We conclude that there is no obvious downside to using RMA and attaching a standard error (SE) to this quantity using a linear model which removes probe-specific affinities.},
- author = {Irizarry, Rafael a and Hobbs, Bridget and Collin, Francois and Beazer-Barclay, Yasmin D and Antonellis, Kristen J and Scherf, Uwe and Speed, Terence P},
- doi = {10.1093/biostatistics/4.2.249},
- file = {:Users/ryan/Documents/Mendeley Desktop/Irizarry et al. - 2003 - Exploration, normalization, and summaries of high density oligonucleotide array probe level data.pdf:pdf},
- issn = {1465-4644},
- journal = {Biostatistics (Oxford, England)},
- keywords = {Algorithms,Animals,DNA Probes,DNA Probes: genetics,Data Interpretation, Statistical,Gene Expression Profiling,Gene Expression Profiling: statistics {\&} numerical,Humans,Linear Models,Mice,Normal Distribution,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Reproducibility of Results,Statistics, Nonparametric},
- month = {apr},
- number = {2},
- pages = {249--64},
- pmid = {12925520},
- title = {{Exploration, normalization, and summaries of high density oligonucleotide array probe level data.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/12925520},
- volume = {4},
- year = {2003}
- }
- @article{Remus2009,
- abstract = {The initiation of chromosomal DNA replication involves initiator proteins that recruit and load hexameric DNA helicases at replication origins. This helicase loading step is tightly regulated in bacteria and eukaryotes. In contrast to the situation in bacteria, the eukaryotic helicase is loaded in an inactive form. This extra 'lock and load' mechanism in eukaryotes allows regulation of a second step, helicase activation. The temporal separation of helicase loading and activation is crucial for the coordination of DNA replication with cell growth and extracellular signals, the prevention of re-replication and the control of origin activity in response to replication stress. Initiator proteins in bacteria and eukaryotes are structurally homologous; yet the replicative helicases they load are unrelated. Understanding how these helicases are loaded and how they act during unwinding may have important implications for understanding how DNA replication is regulated in different domains of life.},
- author = {Remus, Dirk and Diffley, John F X},
- doi = {10.1016/j.ceb.2009.08.002},
- file = {:Users/ryan/Documents/Mendeley Desktop/Remus, Diffley - 2009 - Eukaryotic DNA replication control lock and load, then fire.pdf:pdf},
- issn = {1879-0410},
- journal = {Current opinion in cell biology},
- keywords = {Animals,Cell Cycle,Cell Cycle Proteins,Cell Cycle Proteins: genetics,Cell Cycle Proteins: metabolism,Cell Cycle: genetics,DNA,DNA Helicases,DNA Helicases: genetics,DNA Helicases: metabolism,DNA Replication,DNA: chemistry,DNA: metabolism,DnaB Helicases,DnaB Helicases: genetics,DnaB Helicases: metabolism,Eukaryota,Eukaryota: genetics,Eukaryota: physiology,Humans,Models, Biological,Saccharomyces cerevisiae Proteins,Saccharomyces cerevisiae Proteins: genetics,Saccharomyces cerevisiae Proteins: metabolism},
- month = {dec},
- number = {6},
- pages = {771--7},
- pmid = {19767190},
- title = {{Eukaryotic DNA replication control: lock and load, then fire.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/19767190},
- volume = {21},
- year = {2009}
- }
- @article{Xu2012,
- author = {Xu, H and Sung, WK},
- doi = {10.1007/978},
- file = {:Users/ryan/Documents/Mendeley Desktop/Xu, Sung - 2012 - Identifying Differential Histone Modification Sites from ChIP‐seq Data.pdf:pdf},
- journal = {Next Generation Microarray Bioinformatics},
- keywords = {chip-seq,chipdiff,differential histone modification site,epigenetic modification},
- number = {1},
- pages = {293--303},
- title = {{Identifying Differential Histone Modification Sites from ChIP‐seq Data}},
- url = {http://link.springer.com/protocol/10.1007/978-1-61779-400-1{\_}19},
- volume = {802},
- year = {2012}
- }
- @article{Efron1983,
- abstract = {We construct a prediction rule on the basis of some data, and then wish to estimate the error rate of this rule in classifying future observations. Cross-validation provides a nearly unbiased estimate, using only the original data. Cross-validation turns out to be related ... $\backslash$n},
- author = {Efron, Bradley},
- doi = {10.1080/01621459.1983.10477973},
- file = {:Users/ryan/Documents/Mendeley Desktop/Efron - 1983 - Estimating the Error Rate of a Prediction Rule Improvement on Cross-Validation.pdf:pdf},
- isbn = {01621459},
- issn = {01621459},
- journal = {Journal of the American Statistical Association},
- keywords = {anova,bootstrap,decomposition,logistic regression,prediction problem},
- number = {382},
- pages = {316},
- title = {{Estimating the Error Rate of a Prediction Rule: Improvement on Cross-Validation}},
- url = {http://www.cs.berkeley.edu/{~}jordan/sail/readings/archive/efron-improve{\_}cv.pdf http://www.jstor.org/stable/2288636?origin=crossref{\%}5Cnpapers3://publication/doi/10.2307/2288636},
- volume = {78},
- year = {1983}
- }
- @article{Houseman2012,
- abstract = {BACKGROUND: There has been a long-standing need in biomedical research for a method that quantifies the normally mixed composition of leukocytes beyond what is possible by simple histological or flow cytometric assessments. The latter is restricted by the labile nature of protein epitopes, requirements for cell processing, and timely cell analysis. In a diverse array of diseases and following numerous immune-toxic exposures, leukocyte composition will critically inform the underlying immuno-biology to most chronic medical conditions. Emerging research demonstrates that DNA methylation is responsible for cellular differentiation, and when measured in whole peripheral blood, serves to distinguish cancer cases from controls.$\backslash$n$\backslash$nRESULTS: Here we present a method, similar to regression calibration, for inferring changes in the distribution of white blood cells between different subpopulations (e.g. cases and controls) using DNA methylation signatures, in combination with a previously obtained external validation set consisting of signatures from purified leukocyte samples. We validate the fundamental idea in a cell mixture reconstruction experiment, then demonstrate our method on DNA methylation data sets from several studies, including data from a Head and Neck Squamous Cell Carcinoma (HNSCC) study and an ovarian cancer study. Our method produces results consistent with prior biological findings, thereby validating the approach.$\backslash$n$\backslash$nCONCLUSIONS: Our method, in combination with an appropriate external validation set, promises new opportunities for large-scale immunological studies of both disease states and noxious exposures.},
- author = {Houseman, Eugene Andres and Accomando, William P and Koestler, Devin C and Christensen, Brock C and Marsit, Carmen J and Nelson, Heather H and Wiencke, John K and Kelsey, Karl T},
- doi = {10.1186/1471-2105-13-86},
- file = {:Users/ryan/Documents/Mendeley Desktop/Houseman et al. - 2012 - DNA methylation arrays as surrogate measures of cell mixture distribution.pdf:pdf},
- isbn = {1471-2105 (Electronic)$\backslash$r1471-2105 (Linking)},
- issn = {1471-2105},
- journal = {BMC bioinformatics},
- keywords = {Computer Simulation,DNA Methylation,Data Interpretation, Statistical,Down Syndrome,Down Syndrome: blood,Down Syndrome: diagnosis,Down Syndrome: immunology,Epigenesis, Genetic,Female,Gene Expression Profiling,Head and Neck Neoplasms,Head and Neck Neoplasms: blood,Head and Neck Neoplasms: diagnosis,Head and Neck Neoplasms: immunology,Humans,Leukocyte Count,Leukocyte Count: methods,Leukocytes,Leukocytes: immunology,Obesity,Obesity: blood,Obesity: genetics,Obesity: immunology,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: statistic,Ovarian Neoplasms,Ovarian Neoplasms: blood,Ovarian Neoplasms: diagnosis,Ovarian Neoplasms: immunology},
- number = {1},
- pages = {86},
- pmid = {22568884},
- title = {{DNA methylation arrays as surrogate measures of cell mixture distribution.}},
- url = {http://www.biomedcentral.com/1471-2105/13/86},
- volume = {13},
- year = {2012}
- }
- @article{Klein2014,
- abstract = {MOTIVATION: Histone modifications are a key epigenetic mechanism to activate or repress the transcription of genes. Datasets of matched transcription data and histone modification data obtained by ChIP-seq exist, but methods for integrative analysis of both data types are still rare. Here, we present a novel bioinformatics approach to detect genes that show different transcript abundances between two conditions putatively caused by alterations in histone modification. RESULTS: We introduce a correlation measure for integrative analysis of ChIP-seq and gene transcription data measured by RNA sequencing or microarrays and demonstrate that a proper normalization of ChIP-seq data is crucial. We suggest applying Bayesian mixture models of different types of distributions to further study the distribution of the correlation measure. The implicit classification of the mixture models is used to detect genes with differences between two conditions in both gene transcription and histone modification. The method is applied to different datasets, and its superiority to a naive separate analysis of both data types is demonstrated.Availability and implementation: R/Bioconductor package epigenomix. CONTACT: h.klein@uni-muenster.de SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.},
- author = {Klein, Hans-Ulrich and Sch{\"{a}}fer, Martin and Porse, Bo T. and Hasemann, Marie S. and Ickstadt, Katja and Dugas, Martin},
- doi = {10.1093/bioinformatics/btu003},
- file = {:Users/ryan/Documents/Mendeley Desktop/Klein et al. - 2014 - Integrative analysis of histone ChIP-seq and transcription data using Bayesian mixture models.pdf:pdf},
- isbn = {1367-4811 (Electronic)$\backslash$r1367-4803 (Linking)},
- issn = {1460-2059},
- journal = {Bioinformatics},
- month = {apr},
- number = {8},
- pages = {1154--1162},
- pmid = {24403540},
- title = {{Integrative analysis of histone ChIP-seq and transcription data using Bayesian mixture models}},
- url = {https://academic.oup.com/bioinformatics/article-lookup/doi/10.1093/bioinformatics/btu003},
- volume = {30},
- year = {2014}
- }
- @article{Kikutake2016,
- abstract = {Epigenetic mechanisms such as DNA methylation or histone modifications are essential for the regulation of gene expression and development of tissues. Alteration of epigenetic modifications can be used as an epigenetic biomarker for diagnosis and as promising targets for epigenetic therapy. A recent study explored cancer-cell specific epigenetic biomarkers by examining different types of epigenetic modifications simultaneously. However, it was based on microarrays and reported biomarkers that were also present in normal cells at a low frequency. Here, we first analyzed multi-omics data (including ChIP-Seq data of six types of histone modifications: H3K27ac, H3K4me1, H3K9me3, H3K36me3, H3K27me3, and H3K4me3) obtained from 26 lung adenocarcinoma cell lines and a normal cell line. We identified six genes with both H3K27ac and H3K4me3 histone modifications in their promoter regions, which were not present in the normal cell line, but present in ≥85{\%} (22 out of 26) and ≤96{\%} (25 out of 26) of the lung adenocarcinoma cell lines. Of these genes, NUP210 (encoding a main component of the nuclear pore complex) was the only gene in which the two modifications were not detected in another normal cell line. RNA-Seq analysis revealed that NUP210 was aberrantly overexpressed among the 26 lung adenocarcinoma cell lines, although the frequency of NUP210 overexpression was lower (19.3{\%}) in 57 lung adenocarcinoma tissue samples studied and stored in another database. This study provides a basis to discover epigenetic biomarkers highly specific to a certain cancer, based on multi-omics data at the cell population level.},
- author = {Kikutake, Chie and Yahara, Koji},
- doi = {10.1371/journal.pone.0152918},
- file = {:Users/ryan/Documents/Mendeley Desktop/Kikutake, Yahara - 2016 - Identification of epigenetic biomarkers of lung adenocarcinoma through multi-omics data analysis.pdf:pdf},
- issn = {19326203},
- journal = {PLoS ONE},
- number = {4},
- pages = {1--20},
- pmid = {27042856},
- title = {{Identification of epigenetic biomarkers of lung adenocarcinoma through multi-omics data analysis}},
- url = {http://journals.plos.org/plosone/article/file?id=10.1371/journal.pone.0152918{\&}type=printable},
- volume = {11},
- year = {2016}
- }
- @article{VanDeWiel2013,
- abstract = {Next generation sequencing is quickly replacing microarrays as a technique to probe different molecular levels of the cell, such as DNA or RNA. The technology provides higher resolution, while reducing bias. RNA sequencing results in counts of RNA strands. This type of data imposes new statistical challenges. We present a novel, generic approach to model and analyze such data. Our approach aims at large flexibility of the likelihood (count) model and the regression model alike. Hence, a variety of count models is supported, such as the popular NB model, which accounts for overdispersion. In addition, complex, non-balanced designs and random effects are accommodated. Like some other methods, our method provides shrinkage of dispersion-related parameters. However, we extend it by enabling joint shrinkage of parameters, including those for which inference is desired. We argue that this is essential for Bayesian multiplicity correction. Shrinkage is effectuated by empirically estimating priors. We discuss several parametric (mixture) and non-parametric priors and develop procedures to estimate (parameters of) those. Inference is provided by means of local and Bayesian false discovery rates. We illustrate our method on several simulations and two data sets, also to compare it with other methods. Model- and data-based simulations show substantial improvements in the sensitivity at the given specificity. The data motivate the use of the ZI-NB as a powerful alternative to the NB, which results in higher detection rates for low-count data. Finally, compared with other methods, the results on small sample subsets are more reproducible when validated on their large sample complements, illustrating the importance of the type of shrinkage.},
- author = {{Van De Wiel}, Mark a and Leday, Gwena{\"{e}}l G R and Pardo, Luba and Rue, H{\aa}vard and {Van Der Vaart}, Aad W and {Van Wieringen}, Wessel N},
- doi = {10.1093/biostatistics/kxs031},
- file = {:Users/ryan/Documents/Mendeley Desktop/Van De Wiel et al. - 2013 - Bayesian analysis of RNA sequencing data by estimating multiple shrinkage priors.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Van De Wiel et al. - 2013 - Bayesian analysis of RNA sequencing data by estimating multiple shrinkage priors.pdf:pdf},
- issn = {1468-4357},
- journal = {Biostatistics (Oxford, England)},
- keywords = {Base Sequence,Bayes Theorem,Computer Simulation,Data Interpretation,Models,Molecular Sequence Data,RNA,RNA: chemistry,RNA: genetics,RNA: methods,Sequence Analysis,Statistical},
- month = {jan},
- number = {1},
- pages = {113--28},
- pmid = {22988280},
- title = {{Bayesian analysis of RNA sequencing data by estimating multiple shrinkage priors.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/22988280},
- volume = {14},
- year = {2013}
- }
- @article{Schmitz2012,
- abstract = {Burkitt's lymphoma (BL) can often be cured by intensive chemotherapy, but the toxicity of such therapy precludes its use in the elderly and in patients with endemic BL in developing countries, necessitating new strategies. The normal germinal centre B cell is the presumed cell of origin for both BL and diffuse large B-cell lymphoma (DLBCL), yet gene expression analysis suggests that these malignancies may use different oncogenic pathways. BL is subdivided into a sporadic subtype that is diagnosed in developed countries, the Epstein-Barr-virus-associated endemic subtype, and an HIV-associated subtype, but it is unclear whether these subtypes use similar or divergent oncogenic mechanisms. Here we used high-throughput RNA sequencing and RNA interference screening to discover essential regulatory pathways in BL that cooperate with MYC, the defining oncogene of this cancer. In 70{\%} of sporadic BL cases, mutations affecting the transcription factor TCF3 (E2A) or its negative regulator ID3 fostered TCF3 dependency. TCF3 activated the pro-survival phosphatidylinositol-3-OH kinase pathway in BL, in part by augmenting tonic B-cell receptor signalling. In 38{\%} of sporadic BL cases, oncogenic CCND3 mutations produced highly stable cyclin D3 isoforms that drive cell cycle progression. These findings suggest opportunities to improve therapy for patients with BL.},
- author = {Schmitz, Roland and Young, Ryan M and Ceribelli, Michele and Jhavar, Sameer and Xiao, Wenming and Zhang, Meili and Wright, George and Shaffer, Arthur L and Hodson, Daniel J and Buras, Eric and Liu, Xuelu and Powell, John and Yang, Yandan and Xu, Weihong and Zhao, Hong and Kohlhammer, Holger and Rosenwald, Andreas and Kluin, Philip and M{\"{u}}ller-Hermelink, Hans Konrad and Ott, German and Gascoyne, Randy D and Connors, Joseph M and Rimsza, Lisa M and Campo, Elias and Jaffe, Elaine S and Delabie, Jan and Smeland, Erlend B and Ogwang, Martin D and Reynolds, Steven J and Fisher, Richard I and Braziel, Rita M and Tubbs, Raymond R and Cook, James R and Weisenburger, Dennis D and Chan, Wing C and Pittaluga, Stefania and Wilson, Wyndham and Waldmann, Thomas a and Rowe, Martin and Mbulaiteye, Sam M and Rickinson, Alan B and Staudt, Louis M},
- doi = {10.1038/nature11378},
- file = {:Users/ryan/Documents/Mendeley Desktop/Schmitz et al. - 2012 - Burkitt lymphoma pathogenesis and therapeutic targets from structural and functional genomics.pdf:pdf},
- issn = {1476-4687},
- journal = {Nature},
- month = {aug},
- pages = {3--7},
- pmid = {22885699},
- title = {{Burkitt lymphoma pathogenesis and therapeutic targets from structural and functional genomics.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/22885699},
- year = {2012}
- }
- @article{Aryee2014,
- abstract = {MOTIVATION: The recently released Infinium HumanMethylation450 array (the '450k' array) provides a high-throughput assay to quantify DNA methylation (DNAm) at ∼450 000 loci across a range of genomic features. Although less comprehensive than high-throughput sequencing-based techniques, this product is more cost-effective and promises to be the most widely used DNAm high-throughput measurement technology over the next several years.$\backslash$n$\backslash$nRESULTS: Here we describe a suite of computational tools that incorporate state-of-the-art statistical techniques for the analysis of DNAm data. The software is structured to easily adapt to future versions of the technology. We include methods for preprocessing, quality assessment and detection of differentially methylated regions from the kilobase to the megabase scale. We show how our software provides a powerful and flexible development platform for future methods. We also illustrate how our methods empower the technology to make discoveries previously thought to be possible only with sequencing-based methods.$\backslash$n$\backslash$nAVAILABILITY AND IMPLEMENTATION: http://bioconductor.org/packages/release/bioc/html/minfi.html.$\backslash$n$\backslash$nCONTACT: khansen@jhsph.edu; rafa@jimmy.harvard.edu$\backslash$n$\backslash$nSUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.},
- author = {Aryee, Martin J. and Jaffe, Andrew E. and Corrada-Bravo, Hector and Ladd-Acosta, Christine and Feinberg, Andrew P. and Hansen, Kasper D. and Irizarry, Rafael A.},
- doi = {10.1093/bioinformatics/btu049},
- file = {:Users/ryan/Documents/Mendeley Desktop/Aryee et al. - 2014 - Minfi A flexible and comprehensive Bioconductor package for the analysis of Infinium DNA methylation microarrays.pdf:pdf},
- issn = {1460-2059},
- journal = {Bioinformatics},
- month = {may},
- number = {10},
- pages = {1363--1369},
- title = {{Minfi: a flexible and comprehensive Bioconductor package for the analysis of Infinium DNA methylation microarrays}},
- url = {https://academic.oup.com/bioinformatics/article-lookup/doi/10.1093/bioinformatics/btu049},
- volume = {30},
- year = {2014}
- }
- @article{Varemo2013,
- abstract = {Gene set analysis (GSA) is used to elucidate genome-wide data, in particular transcriptome data. A multitude of methods have been proposed for this step of the analysis, and many of them have been compared and evaluated. Unfortunately, there is no consolidated opinion regarding what methods should be preferred, and the variety of available GSA software and implementations pose a difficulty for the end-user who wants to try out different methods. To address this, we have developed the R package Piano that collects a range of GSA methods into the same system, for the benefit of the end-user. Further on we refine the GSA workflow by using modifications of the gene-level statistics. This enables us to divide the resulting gene set P-values into three classes, describing different aspects of gene expression directionality at gene set level. We use our fully implemented workflow to investigate the impact of the individual components of GSA by using microarray and RNA-seq data. The results show that the evaluated methods are globally similar and the major separation correlates well with our defined directionality classes. As a consequence of this, we suggest to use a consensus scoring approach, based on multiple GSA runs. In combination with the directionality classes, this constitutes a more thorough basis for an enriched biological interpretation.},
- author = {V{\"{a}}remo, Leif and Nielsen, Jens and Nookaew, Intawat},
- doi = {10.1093/nar/gkt111},
- file = {:Users/ryan/Documents/Mendeley Desktop/V{\"{a}}remo, Nielsen, Nookaew - 2013 - Enriching the gene set analysis of genome-wide data by incorporating directionality of gene expres(2).pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/V{\"{a}}remo, Nielsen, Nookaew - 2013 - Enriching the gene set analysis of genome-wide data by incorporating directionality of gene expressio.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/V{\"{a}}remo, Nielsen, Nookaew - 2013 - Enriching the gene set analysis of genome-wide data by incorporating directionality of gene expressio.pdf:pdf},
- issn = {1362-4962},
- journal = {Nucleic acids research},
- keywords = {Data Interpretation,GSEA,Gene Expression Profiling,Genes,Genomics,Genomics: methods,Principal Component Analysis,Saccharomyces cerevisiae,Saccharomyces cerevisiae: genetics,Saccharomyces cerevisiae: metabolism,Software,Statistical,piano},
- mendeley-tags = {GSEA,piano},
- month = {apr},
- number = {8},
- pages = {4378--91},
- pmid = {23444143},
- title = {{Enriching the gene set analysis of genome-wide data by incorporating directionality of gene expression and combining statistical hypotheses and methods.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3632109{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {41},
- year = {2013}
- }
- @article{Maindonald2008,
- author = {Maindonald, J H},
- file = {:Users/ryan/Documents/Mendeley Desktop/Maindonald - 2008 - Using R for Data Analysis and Graphics Introduction , Code and Commentary.pdf:pdf},
- title = {{Using R for Data Analysis and Graphics Introduction , Code and Commentary}},
- year = {2008}
- }
- @article{Du2010,
- abstract = {BACKGROUND: High-throughput profiling of DNA methylation status of CpG islands is crucial to understand the epigenetic regulation of genes. The microarray-based Infinium methylation assay by Illumina is one platform for low-cost high-throughput methylation profiling. Both Beta-value and M-value statistics have been used as metrics to measure methylation levels. However, there are no detailed studies of their relations and their strengths and limitations.$\backslash$n$\backslash$nRESULTS: We demonstrate that the relationship between the Beta-value and M-value methods is a Logit transformation, and show that the Beta-value method has severe heteroscedasticity for highly methylated or unmethylated CpG sites. In order to evaluate the performance of the Beta-value and M-value methods for identifying differentially methylated CpG sites, we designed a methylation titration experiment. The evaluation results show that the M-value method provides much better performance in terms of Detection Rate (DR) and True Positive Rate (TPR) for both highly methylated and unmethylated CpG sites. Imposing a minimum threshold of difference can improve the performance of the M-value method but not the Beta-value method. We also provide guidance for how to select the threshold of methylation differences.$\backslash$n$\backslash$nCONCLUSIONS: The Beta-value has a more intuitive biological interpretation, but the M-value is more statistically valid for the differential analysis of methylation levels. Therefore, we recommend using the M-value method for conducting differential methylation analysis and including the Beta-value statistics when reporting the results to investigators.},
- author = {Du, Pan and Zhang, Xiao and Huang, Chiang-Ching and Jafari, Nadereh and Kibbe, Warren A and Hou, Lifang and Lin, Simon M},
- doi = {10.1186/1471-2105-11-587},
- file = {:Users/ryan/Documents/Mendeley Desktop/Du et al. - 2010 - Comparison of Beta-value and M-value methods for quantifying methylation levels by microarray analysis.pdf:pdf},
- isbn = {1471-2105 (Electronic)$\backslash$n1471-2105 (Linking)},
- issn = {1471-2105},
- journal = {BMC bioinformatics},
- keywords = {CpG Islands,DNA Methylation,Data Interpretation, Statistical,Microarray Analysis,Microarray Analysis: methods},
- number = {1},
- pages = {587},
- pmid = {21118553},
- title = {{Comparison of Beta-value and M-value methods for quantifying methylation levels by microarray analysis.}},
- url = {http://www.biomedcentral.com/1471-2105/11/587},
- volume = {11},
- year = {2010}
- }
- @article{Robles2012,
- abstract = {BACKGROUND: RNA sequencing (RNA-Seq) has emerged as a powerful approach for the detection of differential gene expression with both high-throughput and high resolution capabilities possible depending upon the experimental design chosen. Multiplex experimental designs are now readily available, these can be utilised to increase the numbers of samples or replicates profiled at the cost of decreased sequencing depth generated per sample. These strategies impact on the power of the approach to accurately identify differential expression. This study presents a detailed analysis of the power to detect differential expression in a range of scenarios including simulated null and differential expression distributions with varying numbers of biological or technical replicates, sequencing depths and analysis methods. RESULTS: Differential and non-differential expression datasets were simulated using a combination of negative binomial and exponential distributions derived from real RNA-Seq data. These datasets were used to evaluate the performance of three commonly used differential expression analysis algorithms and to quantify the changes in power with respect to true and false positive rates when simulating variations in sequencing depth, biological replication and multiplex experimental design choices. CONCLUSIONS: This work quantitatively explores comparisons between contemporary analysis tools and experimental design choices for the detection of differential expression using RNA-Seq. We found that the DESeq algorithm performs more conservatively than edgeR and NBPSeq. With regard to testing of various experimental designs, this work strongly suggests that greater power is gained through the use of biological replicates relative to library (technical) replicates and sequencing depth. Strikingly, sequencing depth could be reduced as low as 15{\%} without substantial impacts on false positive or true positive rates.},
- author = {Robles, Jos{\'{e}} A and Qureshi, Sumaira E and Stephen, Stuart J and Wilson, Susan R and Burden, Conrad J and Taylor, Jennifer M},
- doi = {10.1186/1471-2164-13-484},
- file = {:Users/ryan/Documents/Mendeley Desktop/Robles et al. - 2012 - Efficient experimental design and analysis strategies for the detection of differential expression using RNA-S(2).pdf:pdf},
- issn = {1471-2164},
- journal = {BMC genomics},
- keywords = {Algorithms,Gene Expression Profiling,Gene Expression Profiling: methods,RNA,RNA: methods,Sequence Analysis,Statistics as Topic,Statistics as Topic: methods},
- month = {jan},
- number = {1},
- pages = {484},
- pmid = {22985019},
- title = {{Efficient experimental design and analysis strategies for the detection of differential expression using RNA-Sequencing.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/22985019 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3560154{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {13},
- year = {2012}
- }
- @article{Gevaert2015a,
- abstract = {Aberrant DNA methylation is an important mechanism that contributes to oncogenesis. Yet, few algorithms exist that exploit this vast dataset to identify hypo- and hypermethylated genes in cancer. We developed a novel computational algorithm called MethylMix to identify differentially methylated genes that are also predictive of transcription. We apply MethylMix to 12 individual cancer sites, and additionally combine all cancer sites in a pancancer analysis. We discover pancancer hypo- and hypermethylated genes and identify novel methylation-driven subgroups with clinical implications. MethylMix analysis on combined cancer sites reveals 10 pancancer clusters reflecting new similarities across malignantly transformed tissues.},
- author = {Gevaert, Olivier and Tibshirani, Robert and Plevritis, Sylvia K},
- doi = {10.1186/s13059-014-0579-8},
- file = {:Users/ryan/Documents/Mendeley Desktop/Gevaert, Tibshirani, Plevritis - 2015 - Pancancer analysis of DNA methylation-driven genes using MethylMix.pdf:pdf},
- isbn = {1465-6906},
- issn = {1474-760X},
- journal = {Genome biology},
- number = {1},
- pages = {17},
- pmid = {25631659},
- title = {{Pancancer analysis of DNA methylation-driven genes using MethylMix.}},
- url = {http://genomebiology.com/2015/16/1/17},
- volume = {16},
- year = {2015}
- }
- @article{Sammeth2009,
- abstract = {Eukaryotic splicing structures are known to involve a high degree of alternative forms derived from a premature transcript by alternative splicing (AS). With the advent of new sequencing technologies, evidence for new splice forms becomes increasingly available-bit by bit revealing that the true splicing diversity of "AS events" often comprises more than two alternatives and therefore cannot be sufficiently described by pairwise comparisons as conducted in analyzes hitherto. Especially, I emphasize on "complete" AS events which include all hitherto known variants of a splicing variation. Challenges emerge from the richness of data (millions of transcripts) and artifacts introduced during the technical process of obtaining transcript sequences ("noise")-especially when dealing with single-read sequences known as expressed sequence tags (ESTs). Herein, I describe a novel method to efficiently predict AS events in different resolutions ("dimensions") from transcript annotations that allows for combination of fragmented EST data with full-length cDNAs and can cope with large datasets containing noise. At the doorstep of many new splice forms becoming available by novel high-throughput sequencing technologies, the presented method helps to dynamically update AS databases. Applying this method to estimate the real complexity of alternative splicing, I found in human and murine annotations thousands of novel AS events that either have been disregarded or mischaracterized in earlier works. The growth of evidence for such events suggests that the number still keeps climbing. When considering complete events, the majority of exons that are observed as "mutually exclusive" in pairwise comparisons in fact involves at least one other alternative splice form that disagrees with their mutual exclusion. Similar observations also hold for the alternative skipping of two subsequent exons. Results suggest that the systematical analysis of complete AS events on large scale provides subtle insights in the mechanisms that drive (alternative) splicing.},
- author = {Sammeth, Michael},
- doi = {10.1089/cmb.2009.0108},
- file = {:Users/ryan/Documents/Mendeley Desktop/Sammeth - 2009 - Complete alternative splicing events are bubbles in splicing graphs.pdf:pdf},
- issn = {1557-8666},
- journal = {Journal of computational biology : a journal of computational molecular cell biology},
- keywords = {Alternative Splicing,Animals,Exons,Genomics,Genomics: methods,Humans,Introns,Mice,Models, Genetic,RNA,RNA: genetics},
- month = {aug},
- number = {8},
- pages = {1117--40},
- pmid = {19689216},
- title = {{Complete alternative splicing events are bubbles in splicing graphs.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/19689216},
- volume = {16},
- year = {2009}
- }
- @article{Jacob2010a,
- abstract = {We consider multivariate two-sample tests of means, where the location shift between the two populations is expected to be related to a known graph structure. An important application of such tests is the detection of differentially expressed genes between two patient populations, as shifts in expression levels are expected to be coherent with the structure of graphs reflecting gene properties such as biological process, molecular function, regulation, or metabolism. For a fixed graph of interest, we demonstrate that accounting for graph structure can yield more powerful tests under the assumption of smooth distribution shift on the graph. We also investigate the identification of non-homogeneous subgraphs of a given large graph, which poses both computational and multiple testing problems. The relevance and benefits of the proposed approach are illustrated on synthetic data and on breast cancer gene expression data analyzed in context of KEGG pathways.},
- archivePrefix = {arXiv},
- arxivId = {1009.5173},
- author = {Jacob, Laurent and Neuvial, Pierre and Dudoit, Sandrine},
- eprint = {1009.5173},
- file = {:Users/ryan/Documents/Mendeley Desktop/Jacob, Neuvial, Dudoit - 2010 - Gains in Power from Structured Two-Sample Tests of Means on Graphs.pdf:pdf},
- month = {sep},
- pages = {1--24},
- title = {{Gains in Power from Structured Two-Sample Tests of Means on Graphs}},
- url = {http://arxiv.org/abs/1009.5173},
- year = {2010}
- }
- @article{Huang2012,
- abstract = {The circadian clock in mammals is driven by an autoregulatory transcriptional feedback mechanism that takes approximately 24 hours to complete. A key component of this mechanism is a heterodimeric transcriptional activator consisting of two basic helix-loop-helix PER-ARNT-SIM (bHLH-PAS) domain protein subunits, CLOCK and BMAL1. Here, we report the crystal structure of a complex containing the mouse CLOCK:BMAL1 bHLH-PAS domains at 2.3 {\AA} resolution. The structure reveals an unusual asymmetric heterodimer with the three domains in each of the two subunits--bHLH, PAS-A, and PAS-B--tightly intertwined and involved in dimerization interactions, resulting in three distinct protein interfaces. Mutations that perturb the observed heterodimer interfaces affect the stability and activity of the CLOCK:BMAL1 complex as well as the periodicity of the circadian oscillator. The structure of the CLOCK:BMAL1 complex is a starting point for understanding at an atomic level the mechanism driving the mammalian circadian clock.},
- author = {Huang, Nian and Chelliah, Yogarany and Shan, Yongli and Taylor, Clinton a and Yoo, Seung-Hee and Partch, Carrie and Green, Carla B and Zhang, Hong and Takahashi, Joseph S},
- doi = {10.1126/science.1222804},
- file = {:Users/ryan/Documents/Mendeley Desktop/Huang et al. - 2012 - Crystal structure of the heterodimeric CLOCKBMAL1 transcriptional activator complex.pdf:pdf},
- issn = {1095-9203},
- journal = {Science (New York, N.Y.)},
- keywords = {ARNTL Transcription Factors,ARNTL Transcription Factors: chemistry,ARNTL Transcription Factors: genetics,ARNTL Transcription Factors: metabolism,Amino Acid Sequence,Animals,CLOCK Proteins,CLOCK Proteins: chemistry,CLOCK Proteins: genetics,CLOCK Proteins: metabolism,Cells, Cultured,Circadian Rhythm,Crystallography, X-Ray,DNA,DNA: metabolism,HEK293 Cells,Helix-Loop-Helix Motifs,Humans,Mice,Models, Molecular,Molecular Sequence Data,Mutant Proteins,Mutant Proteins: chemistry,Mutant Proteins: metabolism,Protein Binding,Protein Interaction Domains and Motifs,Protein Multimerization,Protein Structure, Quaternary,Protein Structure, Secondary,Protein Structure, Tertiary,Protein Subunits,Protein Subunits: chemistry,Protein Subunits: metabolism,Static Electricity,Transcriptional Activation},
- month = {jul},
- number = {6091},
- pages = {189--94},
- pmid = {22653727},
- title = {{Crystal structure of the heterodimeric CLOCK:BMAL1 transcriptional activator complex.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/22653727},
- volume = {337},
- year = {2012}
- }
- @article{Hu2013,
- abstract = {The RNA transcriptome varies in response to cellular differentiation as well as environmental factors, and can be characterized by the diversity and abundance of transcript isoforms. Differential transcription analysis, the detection of differences between the transcriptomes of different cells, may improve understanding of cell differentiation and development and enable the identification of biomarkers that classify disease types. The availability of high-throughput short-read RNA sequencing technologies provides in-depth sampling of the transcriptome, making it possible to accurately detect the differences between transcriptomes. In this article, we present a new method for the detection and visualization of differential transcription. Our approach does not depend on transcript or gene annotations. It also circumvents the need for full transcript inference and quantification, which is a challenging problem because of short read lengths, as well as various sampling biases. Instead, our method takes a divide-and-conquer approach to localize the difference between transcriptomes in the form of alternative splicing modules (ASMs), where transcript isoforms diverge. Our approach starts with the identification of ASMs from the splice graph, constructed directly from the exons and introns predicted from RNA-seq read alignments. The abundance of alternative splicing isoforms residing in each ASM is estimated for each sample and is compared across sample groups. A non-parametric statistical test is applied to each ASM to detect significant differential transcription with a controlled false discovery rate. The sensitivity and specificity of the method have been assessed using simulated data sets and compared with other state-of-the-art approaches. Experimental validation using qRT-PCR confirmed a selected set of genes that are differentially expressed in a lung differentiation study and a breast cancer data set, demonstrating the utility of the approach applied on experimental biological data sets. The software of DiffSplice is available at http://www.netlab.uky.edu/p/bioinfo/DiffSplice.},
- author = {Hu, Yin and Huang, Yan and Du, Ying and Orellana, Christian F and Singh, Darshan and Johnson, Amy R and Monroy, Ana{\"{i}}s and Kuan, Pei-Fen and Hammond, Scott M and Makowski, Liza and Randell, Scott H and Chiang, Derek Y and Hayes, D Neil and Jones, Corbin and Liu, Yufeng and Prins, Jan F and Liu, Jinze},
- doi = {10.1093/nar/gks1026},
- file = {:Users/ryan/Documents/Mendeley Desktop/Hu et al. - 2013 - DiffSplice the genome-wide detection of differential splicing events with RNA-seq.pdf:pdf},
- issn = {1362-4962},
- journal = {Nucleic acids research},
- month = {jan},
- number = {2},
- pages = {e39},
- pmid = {23155066},
- title = {{DiffSplice: the genome-wide detection of differential splicing events with RNA-seq.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3553996{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {41},
- year = {2013}
- }
- @article{Benjamini1995,
- author = {Benjamini, Y and Hochberg, Y},
- journal = {Journal of the Royal Statistical Society. Series B {\ldots}},
- title = {{Controlling the false discovery rate: a practical and powerful approach to multiple testing}},
- url = {http://www.jstor.org/stable/10.2307/2346101},
- year = {1995}
- }
- @article{Srivastava2010,
- abstract = {Deep sequencing of RNAs (RNA-seq) has been a useful tool to characterize and quantify transcriptomes. However, there are significant challenges in the analysis of RNA-seq data, such as how to separate signals from sequencing bias and how to perform reasonable normalization. Here, we focus on a fundamental question in RNA-seq analysis: the distribution of the position-level read counts. Specifically, we propose a two-parameter generalized Poisson (GP) model to the position-level read counts. We show that the GP model fits the data much better than the traditional Poisson model. Based on the GP model, we can better estimate gene or exon expression, perform a more reasonable normalization across different samples, and improve the identification of differentially expressed genes and the identification of differentially spliced exons. The usefulness of the GP model is demonstrated by applications to multiple RNA-seq data sets.},
- author = {Srivastava, Sudeep and Chen, Liang},
- doi = {10.1093/nar/gkq670},
- file = {:Users/ryan/Documents/Mendeley Desktop/Srivastava, Chen - 2010 - A two-parameter generalized Poisson model to improve the analysis of RNA-seq data.pdf:pdf},
- issn = {1362-4962},
- journal = {Nucleic acids research},
- keywords = {Animals,Exons,Gene Expression Profiling,Gene Expression Profiling: methods,Humans,Mice,Models, Statistical,Poisson Distribution,RNA Splicing,Sequence Analysis, RNA,Sequence Analysis, RNA: methods},
- month = {sep},
- number = {17},
- pages = {e170},
- pmid = {20671027},
- title = {{A two-parameter generalized Poisson model to improve the analysis of RNA-seq data.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2943596{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {38},
- year = {2010}
- }
- @article{Liang2008,
- author = {Liang, Kun},
- file = {:Users/ryan/Documents/Mendeley Desktop/Liang - 2008 - Further Details of DBChIP.pdf:pdf},
- number = {1},
- pages = {1--10},
- title = {{Further Details of DBChIP}},
- year = {2008}
- }
- @article{Wang2012,
- abstract = {MCScan is an algorithm able to scan multiple genomes or subgenomes in order to identify putative homologous chromosomal regions, and align these regions using genes as anchors. The MCScanX toolkit implements an adjusted MCScan algorithm for detection of synteny and collinearity that extends the original software by incorporating 14 utility programs for visualization of results and additional downstream analyses. Applications of MCScanX to several sequenced plant genomes and gene families are shown as examples. MCScanX can be used to effectively analyze chromosome structural changes, and reveal the history of gene family expansions that might contribute to the adaptation of lineages and taxa. An integrated view of various modes of gene duplication can supplement the traditional gene tree analysis in specific families. The source code and documentation of MCScanX are freely available at http://chibba.pgml.uga.edu/mcscan2/.},
- author = {Wang, Yupeng and Tang, Haibao and Debarry, Jeremy D and Tan, Xu and Li, Jingping and Wang, Xiyin and Lee, Tae-ho and Jin, Huizhe and Marler, Barry and Guo, Hui and Kissinger, Jessica C and Paterson, Andrew H},
- doi = {10.1093/nar/gkr1293},
- file = {:Users/ryan/Documents/Mendeley Desktop/Wang et al. - 2012 - MCScanX a toolkit for detection and evolutionary analysis of gene synteny and collinearity.pdf:pdf},
- issn = {1362-4962},
- journal = {Nucleic acids research},
- keywords = {Algorithms,Angiosperms,Angiosperms: genetics,Evolution, Molecular,Gene Duplication,Gene Order,Genome, Plant,Genomics,Multigene Family,Software,Synteny},
- month = {apr},
- number = {7},
- pages = {e49},
- pmid = {22217600},
- title = {{MCScanX: a toolkit for detection and evolutionary analysis of gene synteny and collinearity.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3326336{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {40},
- year = {2012}
- }
- @article{Hashimoto2014,
- author = {Hashimoto, Tatsunori B. and Edwards, Matthew D. and Gifford, David K.},
- doi = {10.1371/journal.pcbi.1003494},
- editor = {McHardy, Alice Carolyn},
- file = {:Users/ryan/Documents/Mendeley Desktop/Hashimoto, Edwards, Gifford - 2014 - Universal Count Correction for High-Throughput Sequencing.pdf:pdf},
- issn = {1553-7358},
- journal = {PLoS Computational Biology},
- month = {mar},
- number = {3},
- pages = {e1003494},
- title = {{Universal Count Correction for High-Throughput Sequencing}},
- url = {http://dx.plos.org/10.1371/journal.pcbi.1003494},
- volume = {10},
- year = {2014}
- }
- @article{Ignatiadis2016,
- abstract = {Hypothesis weighting improves the power of large-scale multiple testing. We describe independent hypothesis weighting (IHW), a method that assigns weights using covariates independent of the P-values under the null hypothesis but informative of each test's power or prior probability of the null hypothesis (http://www.bioconductor. org/packages/IHW). IHW increases power while controlling the false discovery rate and is a practical approach to discovering associations in genomics, high-throughput biology and other large data sets.},
- author = {Ignatiadis, Nikolaos and Klaus, Bernd and Zaugg, Judith B. and Huber, Wolfgang},
- doi = {10.1038/nmeth.3885},
- file = {:Users/ryan/Documents/Mendeley Desktop/Ignatiadis et al. - 2016 - Data-driven hypothesis weighting increases detection power in genome-scale multiple testing.pdf:pdf},
- isbn = {6174322262},
- issn = {1548-7091},
- journal = {Nature Methods},
- month = {jul},
- number = {7},
- pages = {577--580},
- pmid = {27240256},
- title = {{Data-driven hypothesis weighting increases detection power in genome-scale multiple testing}},
- url = {http://www.nature.com/doifinder/10.1038/nmeth.3885 https://www.biorxiv.org/content/biorxiv/early/2015/12/13/034330.full.pdf http://www.nature.com/articles/nmeth.3885},
- volume = {13},
- year = {2016}
- }
- @article{Stegle2012,
- abstract = {We present PEER (probabilistic estimation of expression residuals), a software package implementing statistical models that improve the sensitivity and interpretability of genetic associations in population-scale expression data. This approach builds on factor analysis methods that infer broad variance components in the measurements. PEER takes as input transcript profiles and covariates from a set of individuals, and then outputs hidden factors that explain much of the expression variability. Optionally, these factors can be interpreted as pathway or transcription factor activations by providing prior information about which genes are involved in the pathway or targeted by the factor. The inferred factors are used in genetic association analyses. First, they are treated as additional covariates, and are included in the model to increase detection power for mapping expression traits. Second, they are analyzed as phenotypes themselves to understand the causes of global expression variability. PEER extends previous related surrogate variable models and can be implemented within hours on a desktop computer.},
- author = {Stegle, Oliver and Parts, Leopold and Piipari, Matias and Winn, John and Durbin, Richard},
- doi = {10.1038/nprot.2011.457},
- file = {:Users/ryan/Documents/Mendeley Desktop/Stegle et al. - 2012 - Using probabilistic estimation of expression residuals (PEER) to obtain increased power and interpretability of g.pdf:pdf},
- issn = {1750-2799},
- journal = {Nature protocols},
- keywords = {Algorithms,Factor Analysis, Statistical,Gene Expression Profiling,Gene Expression Profiling: methods,Gene Expression Profiling: statistics {\&} numerical,Genetic Association Studies,Genetic Association Studies: methods,Models, Statistical,Sensitivity and Specificity,Software},
- month = {mar},
- number = {3},
- pages = {500--7},
- pmid = {22343431},
- title = {{Using probabilistic estimation of expression residuals (PEER) to obtain increased power and interpretability of gene expression analyses.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3398141{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {7},
- year = {2012}
- }
- @article{Williamson2011,
- abstract = {Processive reactions, such as transcription or translation, often proceed through distinct initiation and elongation phases. The processive formation of polymeric ubiquitin chains can accordingly be catalyzed by specialized initiating and elongating E2 enzymes, but the functional significance for this division of labor has remained unclear. Here, we have identified sequence motifs in several substrates of the anaphase-promoting complex (APC/C) that are required for efficient chain initiation by its E2 Ube2C. Differences in the quality and accessibility of these chain initiation motifs can determine the rate of a substrate's degradation without affecting its affinity for the APC/C, a mechanism used by the APC/C to control the timing of substrate proteolysis during the cell cycle. Based on our results, we propose that initiation motifs and their cognate E2s allow E3 enzymes to exert precise temporal control over substrate degradation.},
- author = {Williamson, Adam and Banerjee, Sudeep and Zhu, Xining and Philipp, Isabelle and Iavarone, Anthony T and Rape, Michael},
- doi = {10.1016/j.molcel.2011.04.022},
- file = {:Users/ryan/Documents/Mendeley Desktop/Williamson et al. - 2011 - Regulation of ubiquitin chain initiation to control the timing of substrate degradation.pdf:pdf},
- issn = {1097-4164},
- journal = {Molecular cell},
- keywords = {HEK293 Cells,Humans,Time Factors,Ubiquitin,Ubiquitin-Conjugating Enzymes,Ubiquitin-Conjugating Enzymes: metabolism,Ubiquitin-Protein Ligase Complexes,Ubiquitin-Protein Ligase Complexes: metabolism,Ubiquitin-Protein Ligases,Ubiquitin-Protein Ligases: metabolism,Ubiquitin: metabolism},
- month = {jun},
- number = {6},
- pages = {744--57},
- pmid = {21700221},
- publisher = {Elsevier Inc.},
- title = {{Regulation of ubiquitin chain initiation to control the timing of substrate degradation.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3125540{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {42},
- year = {2011}
- }
- @article{Bailey2013,
- abstract = {Mapping the chromosomal locations of transcription factors, nucleosomes, histone modifications, chromatin remodeling enzymes, chaperones, and polymerases is one of the key tasks of modern biology, as evidenced by the Encyclopedia of DNA Elements (ENCODE) Project. To this end, chromatin immunoprecipitation followed by high-throughput sequencing (ChIP-seq) is the standard methodology. Mapping such protein-DNA interactions in vivo using ChIP-seq presents multiple challenges not only in sample preparation and sequencing but also for computational analysis. Here, we present step-by-step guidelines for the computational analysis of ChIP-seq data. We address all the major steps in the analysis of ChIP-seq data: sequencing depth selection, quality checking, mapping, data normalization, assessment of reproducibility, peak calling, differential binding analysis, controlling the false discovery rate, peak annotation, visualization, and motif analysis. At each step in our guidelines we discuss some of the software tools most frequently used. We also highlight the challenges and problems associated with each step in ChIP-seq data analysis. We present a concise workflow for the analysis of ChIP-seq data in Figure 1 that complements and expands on the recommendations of the ENCODE and modENCODE projects. Each step in the workflow is described in detail in the following sections.},
- author = {Bailey, Timothy and Krajewski, Pawel and Ladunga, Istvan and Lefebvre, Celine and Li, Qunhua and Liu, Tao and Madrigal, Pedro and Taslim, Cenny and Zhang, Jie},
- doi = {10.1371/journal.pcbi.1003326},
- file = {:Users/ryan/Documents/Mendeley Desktop/Bailey et al. - 2013 - Practical guidelines for the comprehensive analysis of ChIP-seq data.pdf:pdf},
- issn = {1553-7358},
- journal = {PLoS computational biology},
- month = {nov},
- number = {11},
- pages = {e1003326},
- pmid = {24244136},
- title = {{Practical guidelines for the comprehensive analysis of ChIP-seq data.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3828144{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {9},
- year = {2013}
- }
- @article{Wang2016,
- author = {Wang, Liguo and Nie, Jinfu and Sicotte, Hugues and Li, Ying and Eckel-Passow, Jeanette E. and Dasari, Surendra and Vedell, Peter T. and Barman, Poulami and Wang, Liewei and Weinshiboum, Richard and Jen, Jin and Huang, Haojie and Kohli, Manish and Kocher, Jean-Pierre A.},
- doi = {10.1186/s12859-016-0922-z},
- file = {:Users/ryan/Documents/Mendeley Desktop/Wang et al. - 2016 - Measure transcript integrity using RNA-seq data.pdf:pdf},
- issn = {1471-2105},
- journal = {BMC Bioinformatics},
- keywords = {Transcript integrity number,TIN,RNA-seq quality co,gene expression,rna-seq quality control,tin,transcript integrity number},
- number = {1},
- pages = {58},
- publisher = {BMC Bioinformatics},
- title = {{Measure transcript integrity using RNA-seq data}},
- url = {http://www.biomedcentral.com/1471-2105/17/58},
- volume = {17},
- year = {2016}
- }
- @article{Chabbert2015,
- author = {Chabbert, Christophe D and Adjalley, Sophie H and Klaus, Bernd and Fritsch, Emilie S and Gupta, Ishaan and Pelechano, Vicent and Steinmetz, Lars M},
- doi = {10.15252/msb.20145776},
- file = {:Users/ryan/Documents/Mendeley Desktop/Chabbert et al. - 2015 - A high-throughput ChIP-Seq for large-scale chromatin studies.pdf:pdf},
- issn = {1744-4292 (Print)},
- keywords = {chip-seq,chromatin,high-throughput,histone,histone marks},
- pages = {1--14},
- pmid = {25583149},
- title = {{A high-throughput ChIP-Seq for large-scale chromatin studies}},
- url = {http://msb.embopress.org/content/msb/11/1/777.full.pdf},
- year = {2015}
- }
- @article{Smilde2005,
- abstract = {MOTIVATION: Datasets resulting from metabolomics or metabolic profiling experiments are becoming increasingly complex. Such datasets may contain underlying factors, such as time (time-resolved or longitudinal measurements), doses or combinations thereof. Currently used biostatistics methods do not take the structure of such complex datasets into account. However, incorporating this structure into the data analysis is important for understanding the biological information in these datasets. RESULTS: We describe ASCA, a new method that can deal with complex multivariate datasets containing an underlying experimental design, such as metabolomics datasets. It is a direct generalization of analysis of variance (ANOVA) for univariate data to the multivariate case. The method allows for easy interpretation of the variation induced by the different factors of the design. The method is illustrated with a dataset from a metabolomics experiment with time and dose factors.},
- author = {Smilde, Age K and Jansen, Jeroen J and Hoefsloot, Huub C J and Lamers, Robert-Jan a N and van der Greef, Jan and Timmerman, Marieke E},
- doi = {10.1093/bioinformatics/bti476},
- file = {:Users/ryan/Documents/Mendeley Desktop/Smilde et al. - 2005 - ANOVA-simultaneous component analysis (ASCA) a new tool for analyzing designed metabolomics data.pdf:pdf},
- issn = {1367-4803},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {Algorithms,Analysis of Variance,Animals,Ascorbic Acid,Ascorbic Acid: therapeutic use,Biological Markers,Biological Markers: urine,Computer Simulation,Dose-Response Relationship, Drug,Energy Metabolism,Gene Expression Profiling,Gene Expression Profiling: methods,Guinea Pigs,Male,Models, Biological,Models, Statistical,Multivariate Analysis,Osteoarthritis,Osteoarthritis: diagnosis,Osteoarthritis: drug therapy,Osteoarthritis: urine,Proteome,Proteome: metabolism,Software,Treatment Outcome},
- month = {jul},
- number = {13},
- pages = {3043--8},
- pmid = {15890747},
- title = {{ANOVA-simultaneous component analysis (ASCA): a new tool for analyzing designed metabolomics data.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/15890747},
- volume = {21},
- year = {2005}
- }
- @article{Irizarry2003,
- author = {Irizarry, R. a.},
- doi = {10.1093/nar/gng015},
- file = {:Users/ryan/Documents/Mendeley Desktop/Irizarry - 2003 - Summaries of Affymetrix GeneChip probe level data.pdf:pdf},
- issn = {13624962},
- journal = {Nucleic Acids Research},
- month = {feb},
- number = {4},
- pages = {15e--15},
- title = {{Summaries of Affymetrix GeneChip probe level data}},
- url = {http://www.nar.oupjournals.org/cgi/doi/10.1093/nar/gng015},
- volume = {31},
- year = {2003}
- }
- @article{Feng2011,
- abstract = {Model-based Analysis of ChIP-Seq (MACS) is a command-line tool designed by X. Shirley Liu and colleagues to analyze data generated by ChIP-Seq experiments in eukaryotes, especially mammals. MACS can be used to identify transcription factor binding sites and histone modification-enriched regions if the ChIP-Seq data, with or without control samples, are given. This unit describes two basic protocols that provide detailed information on how to use MACS to identify either the binding sites of a transcription factor or the enriched regions of a histone modification with broad peaks. Furthermore, the basic ideas for the MACS algorithm and its appropriate usage are discussed.},
- author = {Feng, Jianxing and Liu, Tao and Zhang, Yong},
- doi = {10.1002/0471250953.bi0214s34},
- file = {:Users/ryan/Documents/Mendeley Desktop/Feng, Liu, Zhang - 2011 - Using MACS to identify peaks from ChIP-Seq data.pdf:pdf},
- isbn = {0471250953},
- issn = {1934-340X},
- journal = {Current protocols in bioinformatics / editoral board, Andreas D. Baxevanis ... [et al.]},
- keywords = {Algorithms,Base Sequence,Binding Sites,Chromatin Immunoprecipitation,Chromatin Immunoprecipitation: methods,Computational Biology,Computational Biology: methods,DNA,Databases,Factual,Sequence Analysis,Transcription Factors,Transcription Factors: metabolism,tutorial},
- mendeley-tags = {tutorial},
- month = {jun},
- pages = {Unit 2.14},
- pmid = {21633945},
- title = {{Using MACS to identify peaks from ChIP-Seq data.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3120977{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {Chapter 2},
- year = {2011}
- }
- @article{Mo2012,
- abstract = {Chromatin immunoprecipitation followed by next generation sequencing (ChIP-seq) is a powerful technique that is being used in a wide range of biological studies including genome-wide measurements of protein-DNA interactions, DNA methylation, and histone modifications. The vast amount of data and biases introduced by sequencing and/or genome mapping pose new challenges and call for effective methods and fast computer programs for statistical analysis. To systematically model ChIP-seq data, we build a dynamic signal profile for each chromosome and then model the profile using a fully Bayesian hidden Ising model. The proposed model naturally takes into account spatial dependency and global and local distributions of sequence tags. It can be used for one-sample and two-sample analyses. Through model diagnosis, the proposed method can detect falsely enriched regions caused by sequencing and/or mapping errors, which is usually not offered by the existing hypothesis-testing-based methods. The proposed method is illustrated using 3 transcription factor (TF) ChIP-seq data sets and 2 mixed ChIP-seq data sets and compared with 4 popular and/or well-documented methods: MACS, CisGenome, BayesPeak, and SISSRs. The results indicate that the proposed method achieves equivalent or higher sensitivity and spatial resolution in detecting TF binding sites with false discovery rate at a much lower level.},
- author = {Mo, Qianxing},
- doi = {10.1093/biostatistics/kxr029},
- file = {:Users/ryan/Documents/Mendeley Desktop/Mo - 2012 - A fully Bayesian hidden Ising model for ChIP-seq data analysis.pdf:pdf},
- issn = {1468-4357},
- journal = {Biostatistics (Oxford, England)},
- keywords = {chip-seq,ising model,markov random fields,massively parallel sequencing,next generation},
- month = {jan},
- number = {1},
- pages = {113--28},
- pmid = {21914728},
- title = {{A fully Bayesian hidden Ising model for ChIP-seq data analysis.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/21914728},
- volume = {13},
- year = {2012}
- }
- @article{Smyth2005,
- abstract = {A survey is given of differential expression analyses using the linear modeling features of the limma package. The chapter starts with the simplest replicated designs and progresses through experiments with two or more groups, direct designs, factorial designs and time course experiments. Experiments with technical as well as biological replication are considered. Empirical Bayes test statistics are explained. The use of quality weights, adaptive background correction and control spots in conjunction with linear modelling is illustrated on the $\beta$7 data.},
- author = {Smyth, Gordon K},
- doi = {10.1007/0-387-29362-0_23},
- isbn = {9780387251462},
- issn = {00199567},
- journal = {Bioinformatics},
- keywords = {limma,microarray},
- mendeley-tags = {limma,microarray},
- pages = {397--420},
- pmid = {16495579},
- title = {{Limma : Linear Models for Microarray Data}},
- volume = {pages},
- year = {2005}
- }
- @article{Tsutakawa2011,
- abstract = {Flap endonuclease (FEN1), essential for DNA replication and repair, removes RNA and DNA 5' flaps. FEN1 5' nuclease superfamily members acting in nucleotide excision repair (XPG), mismatch repair (EXO1), and homologous recombination (GEN1) paradoxically incise structurally distinct bubbles, ends, or Holliday junctions, respectively. Here, structural and functional analyses of human FEN1:DNA complexes show structure-specific, sequence-independent recognition for nicked dsDNA bent 100° with unpaired 3' and 5' flaps. Above the active site, a helical cap over a gateway formed by two helices enforces ssDNA threading and specificity for free 5' ends. Crystallographic analyses of product and substrate complexes reveal that dsDNA binding and bending, the ssDNA gateway, and double-base unpairing flanking the scissile phosphate control precise flap incision by the two-metal-ion active site. Superfamily conserved motifs bind and open dsDNA; direct the target region into the helical gateway, permitting only nonbase-paired oligonucleotides active site access; and support a unified understanding of superfamily substrate specificity.},
- author = {Tsutakawa, Susan E and Classen, Scott and Chapados, Brian R and Arvai, Andrew S and Finger, L David and Guenther, Grant and Tomlinson, Christopher G and Thompson, Peter and Sarker, Altaf H and Shen, Binghui and Cooper, Priscilla K and Grasby, Jane a and Tainer, John a},
- doi = {10.1016/j.cell.2011.03.004},
- file = {:Users/ryan/Documents/Mendeley Desktop/Tsutakawa et al. - 2011 - Human flap endonuclease structures, DNA double-base flipping, and a unified understanding of the FEN1 superfam.pdf:pdf},
- issn = {1097-4172},
- journal = {Cell},
- keywords = {Amino Acid Sequence,Catalytic Domain,DNA,DNA Mutational Analysis,DNA: metabolism,Exodeoxyribonucleases,Exodeoxyribonucleases: chemistry,Exodeoxyribonucleases: metabolism,Flap Endonucleases,Flap Endonucleases: chemistry,Flap Endonucleases: metabolism,Humans,Models, Molecular,Molecular Sequence Data,Sequence Alignment,Substrate Specificity},
- month = {apr},
- number = {2},
- pages = {198--211},
- pmid = {21496641},
- publisher = {Elsevier Inc.},
- title = {{Human flap endonuclease structures, DNA double-base flipping, and a unified understanding of the FEN1 superfamily.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3086263{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {145},
- year = {2011}
- }
- @article{Grossmann2007a,
- abstract = {MOTIVATION: High-throughput experiments such as microarray hybridizations often yield long lists of genes found to share a certain characteristic such as differential expression. Exploring Gene Ontology (GO) annotations for such lists of genes has become a widespread practice to get first insights into the potential biological meaning of the experiment. The standard statistical approach to measuring overrepresentation of GO terms cannot cope with the dependencies resulting from the structure of GO because they analyze each term in isolation. Especially the fact that annotations are inherited from more specific descendant terms can result in certain types of false-positive results with potentially misleading biological interpretation, a phenomenon which we term the inheritance problem.
- RESULTS: We present here a novel approach to analysis of GO term overrepresentation that determines overrepresentation of terms in the context of annotations to the term's parents. This approach reduces the dependencies between the individual term's measurements, and thereby avoids producing false-positive results owing to the inheritance problem. ROC analysis using study sets with overrepresented GO terms showed a clear advantage for our approach over the standard algorithm with respect to the inheritance problem. Although there can be no gold standard for exploratory methods such as analysis of GO term overrepresentation, analysis of biological datasets suggests that our algorithm tends to identify the core GO terms that are most characteristic of the dataset being analyzed.},
- author = {Grossmann, Steffen and Bauer, Sebastian and Robinson, Peter N and Vingron, Martin},
- doi = {10.1093/bioinformatics/btm440},
- file = {:Users/ryan/Documents/Mendeley Desktop/Grossmann et al. - 2007 - Improved detection of overrepresentation of Gene-Ontology annotations with parent child analysis.pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {Algorithms,Gene Expression Profiling,Gene Expression Profiling: methods,Genes,Genes: genetics,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Phylogeny,Reproducibility of Results,Sensitivity and Specificity},
- month = {nov},
- number = {22},
- pages = {3024--31},
- pmid = {17848398},
- title = {{Improved detection of overrepresentation of Gene-Ontology annotations with parent child analysis.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/17848398},
- volume = {23},
- year = {2007}
- }
- @article{Shen2013,
- abstract = {ChIP-seq is increasingly being used for genome-wide profiling of histone modification marks. It is of particular importance to compare ChIP-seq data of two different conditions, such as disease vs. control, and identify regions that show differences in ChIP enrichment. We have developed a powerful and easy to use program, called diffReps, to detect those differential sites from ChIP-seq data, with or without biological replicates. In addition, we have developed two useful tools for ChIP-seq analysis in the diffReps package: one for the annotation of the differential sites and the other for finding chromatin modification "hotspots". diffReps is developed in PERL programming language and runs on all platforms as a command line script. We tested diffReps on two different datasets. One is the comparison of H3K4me3 between two human cell lines from the ENCODE project. The other is the comparison of H3K9me3 in a discrete region of mouse brain between cocaine- and saline-treated conditions. The results indicated that diffReps is a highly sensitive program in detecting differential sites from ChIP-seq data.},
- author = {Shen, Li and Shao, Ning-Yi and Liu, Xiaochuan and Maze, Ian and Feng, Jian and Nestler, Eric J},
- doi = {10.1371/journal.pone.0065598},
- file = {:Users/ryan/Documents/Mendeley Desktop/Shen et al. - 2013 - diffReps Detecting Differential Chromatin Modification Sites from ChIP-seq Data with Biological Replicates.pdf:pdf},
- issn = {1932-6203},
- journal = {PloS one},
- month = {jan},
- number = {6},
- pages = {e65598},
- pmid = {23762400},
- title = {{diffReps: Detecting Differential Chromatin Modification Sites from ChIP-seq Data with Biological Replicates.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/23762400},
- volume = {8},
- year = {2013}
- }
- @article{Engstrom2013,
- abstract = {High-throughput RNA sequencing is an increasingly accessible method for studying gene structure and activity on a genome-wide scale. A critical step in RNA-seq data analysis is the alignment of partial transcript reads to a reference genome sequence. To assess the performance of current mapping software, we invited developers of RNA-seq aligners to process four large human and mouse RNA-seq data sets. In total, we compared 26 mapping protocols based on 11 programs and pipelines and found major performance differences between methods on numerous benchmarks, including alignment yield, basewise accuracy, mismatch and gap placement, exon junction discovery and suitability of alignments for transcript reconstruction. We observed concordant results on real and simulated RNA-seq data, confirming the relevance of the metrics employed. Future developments in RNA-seq alignment methods would benefit from improved placement of multimapped reads, balanced utilization of existing gene annotation and a reduced false discovery rate for splice junctions.},
- author = {Engstr{\"{o}}m, P{\"{a}}r G and Steijger, Tamara and Sipos, Botond and Grant, Gregory R and Kahles, Andr{\'{e}} and Alioto, Tyler and Behr, Jonas and Bertone, Paul and Bohnert, Regina and Campagna, Davide and Davis, Carrie a and Dobin, Alexander and Gingeras, Thomas R and Goldman, Nick and Guig{\'{o}}, Roderic and Harrow, Jennifer and Hubbard, Tim J and Jean, G{\'{e}}raldine and Kosarev, Peter and Li, Sheng and Liu, Jinze and Mason, Christopher E and Molodtsov, Vladimir and Ning, Zemin and Ponstingl, Hannes and Prins, Jan F and R{\"{a}}tsch, Gunnar and Ribeca, Paolo and Seledtsov, Igor and Solovyev, Victor and Valle, Giorgio and Vitulo, Nicola and Wang, Kai and Wu, Thomas D and Zeller, Georg},
- doi = {10.1038/nmeth.2722},
- file = {:Users/ryan/Documents/Mendeley Desktop/Engstr{\"{o}}m et al. - 2013 - Systematic evaluation of spliced alignment programs for RNA-seq data.pdf:pdf},
- issn = {1548-7105},
- journal = {Nature methods},
- month = {nov},
- number = {November},
- pages = {10--12},
- pmid = {24185836},
- title = {{Systematic evaluation of spliced alignment programs for RNA-seq data.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/24185836},
- year = {2013}
- }
- @article{Kauffmann2009,
- abstract = {SUMMARY: The assessment of data quality is a major concern in microarray analysis. arrayQualityMetrics is a Bioconductor package that provides a report with diagnostic plots for one or two colour microarray data. The quality metrics assess reproducibility, identify apparent outlier arrays and compute measures of signal-to-noise ratio. The tool handles most current microarray technologies and is amenable to use in automated analysis pipelines or for automatic report generation, as well as for use by individuals. The diagnosis of quality remains, in principle, a context-dependent judgement, but our tool provides powerful, automated, objective and comprehensive instruments on which to base a decision. AVAILABILITY: arrayQualityMetrics is a free and open source package, under LGPL license, available from the Bioconductor project at www.bioconductor.org. A users guide and examples are provided with the package. Some examples of HTML reports generated by arrayQualityMetrics can be found at http://www.microarray-quality.org},
- archivePrefix = {arXiv},
- arxivId = {arXiv:0710.0178v2},
- author = {Kauffmann, Audrey and Gentleman, Robert and Huber, Wolfgang},
- doi = {10.1093/bioinformatics/btn647},
- eprint = {arXiv:0710.0178v2},
- file = {:Users/ryan/Documents/Mendeley Desktop/Kauffmann, Gentleman, Huber - 2009 - arrayQualityMetrics--a bioconductor package for quality assessment of microarray data.pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {Algorithms,Computational Biology,Computational Biology: methods,Data Interpretation, Statistical,Internet,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Oligonucleotide Array Sequence Analysis: standards,Quality Control,Software},
- month = {feb},
- number = {3},
- pages = {415--6},
- pmid = {19106121},
- title = {{arrayQualityMetrics--a bioconductor package for quality assessment of microarray data.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2639074{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {25},
- year = {2009}
- }
- @article{Benjamini1997,
- abstract = {ABSTRACT. In this paper we offer a multiplicity of approaches and procedures for multiple testing problems with weights. Some rationale for incorporating weights in multiple hypotheses testing are discussed. Various type-I error-rates and different possible ... $\backslash$n},
- author = {Benjamini, Yoav and Hochberg, Yosef},
- doi = {10.1111/1467-9469.00072},
- file = {:Users/ryan/Documents/Mendeley Desktop/Benjamini, Hochberg - 1997 - Multiple Hypotheses Testing with Weights.pdf:pdf},
- isbn = {0303-6898},
- issn = {03036898, 14679469},
- journal = {Scandinavian Journal of Statistics},
- keywords = {control weights,false discovery rate,family-wise error-rate,p -values,per-family error-rate,procedural weights},
- number = {3},
- pages = {407--418},
- title = {{Multiple Hypotheses Testing with Weights}},
- url = {http://onlinelibrary.wiley.com.ezp-prod1.hul.harvard.edu/doi/10.1111/1467-9469.00072/abstract{\%}5Cnpapers3://publication/doi/10.1111/1467-9469.00072},
- volume = {24},
- year = {1997}
- }
- @article{Alexa2006a,
- abstract = {MOTIVATION: The result of a typical microarray experiment is a long list of genes with corresponding expression measurements. This list is only the starting point for a meaningful biological interpretation. Modern methods identify relevant biological processes or functions from gene expression data by scoring the statistical significance of predefined functional gene groups, e.g. based on Gene Ontology (GO). We develop methods that increase the explanatory power of this approach by integrating knowledge about relationships between the GO terms into the calculation of the statistical significance.
- RESULTS: We present two novel algorithms that improve GO group scoring using the underlying GO graph topology. The algorithms are evaluated on real and simulated gene expression data. We show that both methods eliminate local dependencies between GO terms and point to relevant areas in the GO graph that remain undetected with state-of-the-art algorithms for scoring functional terms. A simulation study demonstrates that the new methods exhibit a higher level of detecting relevant biological terms than competing methods.},
- author = {Alexa, Adrian and Rahnenf{\"{u}}hrer, J{\"{o}}rg and Lengauer, Thomas},
- doi = {10.1093/bioinformatics/btl140},
- file = {:Users/ryan/Documents/Mendeley Desktop/Alexa, Rahnenf{\"{u}}hrer, Lengauer - 2006 - Improved scoring of functional groups from gene expression data by decorrelating GO graph struct.pdf:pdf},
- issn = {1367-4803},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {Algorithms,Cluster Analysis,Computational Biology,Computational Biology: methods,Databases, Genetic,Gene Expression Profiling,Gene Expression Regulation,Gene Expression Regulation, Neoplastic,Humans,Leukemia,Leukemia: metabolism,Models, Statistical,Oligonucleotide Array Sequence Analysis,Protein Folding},
- month = {jul},
- number = {13},
- pages = {1600--7},
- pmid = {16606683},
- title = {{Improved scoring of functional groups from gene expression data by decorrelating GO graph structure.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/16606683},
- volume = {22},
- year = {2006}
- }
- @article{Song2014,
- abstract = {PMID: 25398208},
- author = {Song, Li and Florea, Liliana and Langmead, Ben},
- doi = {10.1186/s13059-014-0509-9},
- file = {:Users/ryan/Documents/Mendeley Desktop/Song, Florea, Langmead - 2014 - Lighter fast and memory-efficient sequencing error correction without counting.pdf:pdf},
- isbn = {1465-6906},
- issn = {1465-6906},
- journal = {Genome Biology},
- number = {11},
- pages = {509},
- pmid = {25398208},
- title = {{Lighter: fast and memory-efficient sequencing error correction without counting}},
- url = {http://genomebiology.com/2014/15/11/509/abstract{\%}5Cnhttp://genomebiology.com/content/pdf/s13059-014-0509-9.pdf},
- volume = {15},
- year = {2014}
- }
- @article{Pounds2006a,
- abstract = {The analysis of microarray data often involves performing a large number of statistical tests, usually at least one test per queried gene. Each test has a certain probability of reaching an incorrect inference; therefore, it is crucial to estimate or control error rates that measure the occurrence of erroneous conclusions in reporting and interpreting the results of a microarray study. In recent years, many innovative statistical methods have been developed to estimate or control various error rates for microarray studies. Researchers need guidance choosing the appropriate statistical methods for analysing these types of data sets. This review describes a family of methods that use a set of P-values to estimate or control the false discovery rate and similar error rates. Finally, these methods are classified in a manner that suggests the appropriate method for specific applications and diagnostic procedures that can identify problems in the analysis are described.},
- author = {Pounds, Stanley B.},
- doi = {10.1093/bib/bbk002},
- file = {:Users/ryan/Documents/Mendeley Desktop/Pounds - 2006 - Estimation and control of multiple testing error rates for microarray studies.pdf:pdf},
- isbn = {1467-5463},
- issn = {14675463},
- journal = {Briefings in Bioinformatics},
- keywords = {Error rate,False discovery rate,Gene expression,Microarray,Multiple testing,Statistical analysis},
- number = {1},
- pages = {25--36},
- pmid = {16761362},
- title = {{Estimation and control of multiple testing error rates for microarray studies}},
- url = {http://bib.oxfordjournals.org/content/7/1/25.full.pdf},
- volume = {7},
- year = {2006}
- }
- @article{Robinson2012,
- author = {Chen, Yunshun and Mccarthy, Davis and Robinson, Mark and Smyth, Gordon K},
- file = {:Users/ryan/Documents/Mendeley Desktop/Chen et al. - 2015 - edgeR differential expression analysis of digital gene expression data User ' s Guide.pdf:pdf},
- number = {April},
- title = {{edgeR : differential expression analysis of digital gene expression data User ' s Guide}},
- year = {2015}
- }
- @article{Hesterberg2014,
- abstract = {I have three goals in this article: (1) To show the enormous potential of bootstrapping and permutation tests to help students understand statistical concepts including sampling distributions, standard errors, bias, confidence intervals, null distributions, and P-values. (2) To dig deeper, understand why these methods work and when they don't, things to watch out for, and how to deal with these issues when teaching. (3) To change statistical practice---by comparing these methods to common t tests and intervals, we see how inaccurate the latter are; we confirm this with asymptotics. n {\textgreater}= 30 isn't enough---think n {\textgreater}= 5000. Resampling provides diagnostics, and more accurate alternatives. Sadly, the common bootstrap percentile interval badly under-covers in small samples; there are better alternatives. The tone is informal, with a few stories and jokes.},
- archivePrefix = {arXiv},
- arxivId = {1411.5279},
- author = {Hesterberg, Tim},
- doi = {10.1080/00031305.2015.1089789},
- eprint = {1411.5279},
- file = {:Users/ryan/Documents/Mendeley Desktop/Hesterberg - 2014 - What Teachers Should Know about the Bootstrap Resampling in the Undergraduate Statistics Curriculum.pdf:pdf},
- issn = {0003-1305},
- journal = {arXiv preprint},
- keywords = {bootstrap,permutation test,randomization test,teaching},
- number = {July 2016},
- pages = {83},
- title = {{What Teachers Should Know about the Bootstrap: Resampling in the Undergraduate Statistics Curriculum}},
- url = {https://www.amstat.org/education/pdfs/ResamplingUndergradCurriculum.pdf{\%}5Cnhttp://arxiv.org/abs/1411.5279},
- volume = {1305},
- year = {2014}
- }
- @article{Dunham2012,
- abstract = {The human genome encodes the blueprint of life, but the function of the vast majority of its nearly three billion bases is unknown. The Encyclopedia of DNA Elements (ENCODE) project has systematically mapped regions of transcription, transcription factor association, chromatin structure and histone modification. These data enabled us to assign biochemical functions for 80{\%} of the genome, in particular outside of the well-studied protein-coding regions. Many discovered candidate regulatory elements are physically associated with one another and with expressed genes, providing new insights into the mechanisms of gene regulation. The newly identified elements also show a statistical correspondence to sequence variants linked to human disease, and can thereby guide interpretation of this variation. Overall, the project provides new insights into the organization and regulation of our genes and genome, and is an expansive resource of functional annotations for biomedical research.},
- author = {Dunham, Ian and Kundaje, Anshul and Aldred, Shelley F. and Collins, Patrick J. and Davis, Carrie A. and Doyle, Francis and Epstein, Charles B. and Frietze, Seth and Harrow, Jennifer and Kaul, Rajinder and Khatun, Jainab and Lajoie, Bryan R. and Landt, Stephen G. and Lee, Bum Kyu and Pauli, Florencia and Rosenbloom, Kate R. and Sabo, Peter and Safi, Alexias and Sanyal, Amartya and Shoresh, Noam and Simon, Jeremy M. and Song, Lingyun and Trinklein, Nathan D. and Altshuler, Robert C. and Birney, Ewan and Brown, James B. and Cheng, Chao and Djebali, Sarah and Dong, Xianjun and Ernst, Jason and Furey, Terrence S. and Gerstein, Mark and Giardine, Belinda and Greven, Melissa and Hardison, Ross C. and Harris, Robert S. and Herrero, Javier and Hoffman, Michael M. and Iyer, Sowmya and Kellis, Manolis and Kheradpour, Pouya and Lassmann, Timo and Li, Qunhua and Lin, Xinying and Marinov, Georgi K. and Merkel, Angelika and Mortazavi, Ali and Parker, Stephen C.J. and Reddy, Timothy E. and Rozowsky, Joel and Schlesinger, Felix and Thurman, Robert E. and Wang, Jie and Ward, Lucas D. and Whitfield, Troy W. and Wilder, Steven P. and Wu, Weisheng and Xi, Hualin S. and Yip, Kevin Y. and Zhuang, Jiali and Bernstein, Bradley E. and Green, Eric D. and Gunter, Chris and Snyder, Michael and Pazin, Michael J. and Lowdon, Rebecca F. and Dillon, Laura A.L. and Adams, Leslie B. and Kelly, Caroline J. and Zhang, Julia and Wexler, Judith R. and Good, Peter J. and Feingold, Elise A. and Crawford, Gregory E. and Dekker, Job and Elnitski, Laura and Farnham, Peggy J. and Giddings, Morgan C. and Gingeras, Thomas R. and Guig{\'{o}}, Roderic and Hubbard, Timothy J. and Kent, W. James and Lieb, Jason D. and Margulies, Elliott H. and Myers, Richard M. and Stamatoyannopoulos, John A. and Tenenbaum, Scott A. and Weng, Zhiping and White, Kevin P. and Wold, Barbara and Yu, Yanbao and Wrobel, John and Risk, Brian A. and Gunawardena, Harsha P. and Kuiper, Heather C. and Maier, Christopher W. and Xie, Ling and Chen, Xian and Mikkelsen, Tarjei S. and Gillespie, Shawn and Goren, Alon and Ram, Oren and Zhang, Xiaolan and Wang, Li and Issner, Robbyn and Coyne, Michael J. and Durham, Timothy and Ku, Manching and Truong, Thanh and Eaton, Matthew L. and Dobin, Alex and Tanzer, Andrea and Lagarde, Julien and Lin, Wei and Xue, Chenghai and Williams, Brian A. and Zaleski, Chris and R{\"{o}}der, Maik and Kokocinski, Felix and Abdelhamid, Rehab F. and Alioto, Tyler and Antoshechkin, Igor and Baer, Michael T. and Batut, Philippe and Bell, Ian and Bell, Kimberly and Chakrabortty, Sudipto and Chrast, Jacqueline and Curado, Joao and Derrien, Thomas and Drenkow, Jorg and Dumais, Erica and Dumais, Jackie and Duttagupta, Radha and Fastuca, Megan and Fejes-Toth, Kata and Ferreira, Pedro and Foissac, Sylvain and Fullwood, Melissa J. and Gao, Hui and Gonzalez, David and Gordon, Assaf and Howald, C{\'{e}}dric and Jha, Sonali and Johnson, Rory and Kapranov, Philipp and King, Brandon and Kingswood, Colin and Li, Guoliang and Luo, Oscar J. and Park, Eddie and Preall, Jonathan B. and Presaud, Kimberly and Ribeca, Paolo and Robyr, Daniel and Ruan, Xiaoan and Sammeth, Michael and Sandhu, Kuljeet Singh and Schaeffer, Lorain and See, Lei Hoon and Shahab, Atif and Skancke, Jorgen and Suzuki, Ana Maria and Takahashi, Hazuki and Tilgner, Hagen and Trout, Diane and Walters, Nathalie and Wang, Huaien and Hayashizaki, Yoshihide and Reymond, Alexandre and Antonarakis, Stylianos E. and Hannon, Gregory J. and Ruan, Yijun and Carninci, Piero and Sloan, Cricket A. and Learned, Katrina and Malladi, Venkat S. and Wong, Matthew C. and Barber, Galt P. and Cline, Melissa S. and Dreszer, Timothy R. and Heitner, Steven G. and Karolchik, Donna and Kirkup, Vanessa M. and Meyer, Laurence R. and Long, Jeffrey C. and Maddren, Morgan and Raney, Brian J. and Grasfeder, Linda L. and Giresi, Paul G. and Battenhouse, Anna and Sheffield, Nathan C. and Showers, Kimberly A. and London, Darin and Bhinge, Akshay A. and Shestak, Christopher and Schaner, Matthew R. and Kim, Seul Ki and Zhang, Zhuzhu Z. and Mieczkowski, Piotr A. and Mieczkowska, Joanna O. and Liu, Zheng and McDaniell, Ryan M. and Ni, Yunyun and Rashid, Naim U. and Kim, Min Jae and Adar, Sheera and Zhang, Zhancheng and Wang, Tianyuan and Winter, Deborah and Keefe, Damian and Iyer, Vishwanath R. and Zheng, Meizhen and Wang, Ping and Gertz, Jason and Vielmetter, Jost and Partridge, E. Christopher and Varley, Katherine E. and Gasper, Clarke and Bansal, Anita and Pepke, Shirley and Jain, Preti and Amrhein, Henry and Bowling, Kevin M. and Anaya, Michael and Cross, Marie K. and Muratet, Michael A. and Newberry, Kimberly M. and McCue, Kenneth and Nesmith, Amy S. and Fisher-Aylor, Katherine I. and Pusey, Barbara and DeSalvo, Gilberto and Parker, Stephanie L. and Balasubramanian, Sreeram and Davis, Nicholas S. and Meadows, Sarah K. and Eggleston, Tracy and Newberry, J. Scott and Levy, Shawn E. and Absher, Devin M. and Wong, Wing H. and Blow, Matthew J. and Visel, Axel and Pennachio, Len A. and Petrykowska, Hanna M. and Abyzov, Alexej and Aken, Bronwen and Barrell, Daniel and Barson, Gemma and Berry, Andrew and Bignell, Alexandra and Boychenko, Veronika and Bussotti, Giovanni and Davidson, Claire and Despacio-Reyes, Gloria and Diekhans, Mark and Ezkurdia, Iakes and Frankish, Adam and Gilbert, James and Gonzalez, Jose Manuel and Griffiths, Ed and Harte, Rachel and Hendrix, David A. and Hunt, Toby and Jungreis, Irwin and Kay, Mike and Khurana, Ekta and Leng, Jing and Lin, Michael F. and Loveland, Jane and Lu, Zhi and Manthravadi, Deepa and Mariotti, Marco and Mudge, Jonathan and Mukherjee, Gaurab and Notredame, Cedric and Pei, Baikang and Rodriguez, Jose Manuel and Saunders, Gary and Sboner, Andrea and Searle, Stephen and Sisu, Cristina and Snow, Catherine and Steward, Charlie and Tapanari, Electra and Tress, Michael L. and {Van Baren}, Marijke J. and Washietl, Stefan and Wilming, Laurens and Zadissa, Amonida and Zhang, Zhengdong and Brent, Michael and Haussler, David and Valencia, Alfonso and Addleman, Nick and Alexander, Roger P. and Auerbach, Raymond K. and Balasubramanian, Suganthi and Bettinger, Keith and Bhardwaj, Nitin and Boyle, Alan P. and Cao, Alina R. and Cayting, Philip and Charos, Alexandra and Cheng, Yong and Eastman, Catharine and Euskirchen, Ghia and Fleming, Joseph D. and Grubert, Fabian and Habegger, Lukas and Hariharan, Manoj and Harmanci, Arif and Iyengar, Sushma and Jin, Victor X. and Karczewski, Konrad J. and Kasowski, Maya and Lacroute, Phil and Lam, Hugo and Lamarre-Vincent, Nathan and Lian, Jin and Lindahl-Allen, Marianne and Min, Renqiang and Miotto, Benoit and Monahan, Hannah and Moqtaderi, Zarmik and Mu, Xinmeng J. and O'Geen, Henriette and Ouyang, Zhengqing and Patacsil, Dorrelyn and Raha, Debasish and Ramirez, Lucia and Reed, Brian and Shi, Minyi and Slifer, Teri and Witt, Heather and Wu, Linfeng and Xu, Xiaoqin and Yan, Koon Kiu and Yang, Xinqiong and Struhl, Kevin and Weissman, Sherman M. and Penalva, Luiz O. and Karmakar, Subhradip and Bhanvadia, Raj R. and Choudhury, Alina and Domanus, Marc and Ma, Lijia and Moran, Jennifer and Victorsen, Alec and Auer, Thomas and Centanin, Lazaro and Eichenlaub, Michael and Gruhl, Franziska and Heermann, Stephan and Hoeckendorf, Burkhard and Inoue, Daigo and Kellner, Tanja and Kirchmaier, Stephan and Mueller, Claudia and Reinhardt, Robert and Schertel, Lea and Schneider, Stephanie and Sinn, Rebecca and Wittbrodt, Beate and Wittbrodt, Jochen and Jain, Gaurav and Balasundaram, Gayathri and Bates, Daniel L. and Byron, Rachel and Canfield, Theresa K. and Diegel, Morgan J. and Dunn, Douglas and Ebersol, Abigail K. and Frum, Tristan and Garg, Kavita and Gist, Erica and Hansen, R. Scott and Boatman, Lisa and Haugen, Eric and Humbert, Richard and Johnson, Audra K. and Johnson, Ericka M. and Kutyavin, Tattyana V. and Lee, Kristen and Lotakis, Dimitra and Maurano, Matthew T. and Neph, Shane J. and Neri, Fiedencio V. and Nguyen, Eric D. and Qu, Hongzhu and Reynolds, Alex P. and Roach, Vaughn and Rynes, Eric and Sanchez, Minerva E. and Sandstrom, Richard S. and Shafer, Anthony O. and Stergachis, Andrew B. and Thomas, Sean and Vernot, Benjamin and Vierstra, Jeff and Vong, Shinny and Wang, Hao and Weaver, Molly A. and Yan, Yongqi and Zhang, Miaohua and Akey, Joshua M. and Bender, Michael and Dorschner, Michael O. and Groudine, Mark and MacCoss, Michael J. and Navas, Patrick and Stamatoyannopoulos, George and Beal, Kathryn and Brazma, Alvis and Flicek, Paul and Johnson, Nathan and Lukk, Margus and Luscombe, Nicholas M. and Sobral, Daniel and Vaquerizas, Juan M. and Batzoglou, Serafim and Sidow, Arend and Hussami, Nadine and Kyriazopoulou-Panagiotopoulou, Sofia and Libbrecht, Max W. and Schaub, Marc A. and Miller, Webb and Bickel, Peter J. and Banfai, Balazs and Boley, Nathan P. and Huang, Haiyan and Li, Jingyi Jessica and Noble, William Stafford and Bilmes, Jeffrey A. and Buske, Orion J. and Sahu, Avinash D. and Kharchenko, Peter V. and Park, Peter J. and Baker, Dannon and Taylor, James and Lochovsky, Lucas},
- doi = {10.1038/nature11247},
- file = {:Users/ryan/Documents/Mendeley Desktop/Dunham et al. - 2012 - An integrated encyclopedia of DNA elements in the human genome.pdf:pdf},
- issn = {14764687},
- journal = {Nature},
- number = {7414},
- pages = {57--74},
- title = {{An integrated encyclopedia of DNA elements in the human genome}},
- url = {https://www.nature.com/articles/nature11247.pdf},
- volume = {489},
- year = {2012}
- }
- @article{Searle2008,
- author = {Searle, Brian C and Turner, Mark and Nesvizhskii, Alexey I},
- file = {:Users/ryan/Documents/Mendeley Desktop/Searle, Turner, Nesvizhskii - 2008 - Improving Sensitivity by Probabilistically Combining Results from Multiple MS MS Search Methodolog.pdf:pdf},
- journal = {Journal of Proteome Research},
- keywords = {bioinfor-,database searching,mascot,mass spectrometry,matics,peptide identification,probability,protein identification,proteomics,sequest,tandem,x},
- pages = {245--253},
- title = {{Improving Sensitivity by Probabilistically Combining Results from Multiple MS / MS Search Methodologies research articles}},
- year = {2008}
- }
- @article{Kim2012,
- abstract = {The 15 known Fanconi anemia proteins cooperate in a pathway that regulates DNA interstrand cross-link repair. Recent studies indicate that the Fanconi anemia pathway also controls Rev1-mediated translesion DNA synthesis (TLS). We identified Fanconi anemia-associated protein (FAAP20), an integral subunit of the multisubunit Fanconi anemia core complex. FAAP20 binds to FANCA subunit and is required for stability of the complex and monoubiquitination of FANCD2. FAAP20 contains a ubiquitin-binding zinc finger 4 domain and binds to the monoubiquitinated form of Rev1. FAAP20 binding stabilizes Rev1 nuclear foci and promotes interaction of the Fanconi anemia core with PCNA-Rev1 DNA damage bypass complexes. FAAP20 therefore provides a critical link between the Fanconi anemia pathway and TLS polymerase activity. We propose that the Fanconi anemia core complex regulates cross-link repair by channeling lesions to damage bypass pathways and preventing large DNA insertions and deletions.},
- author = {Kim, Hyungjin and Yang, Kailin and Dejsuphong, Donniphat and D'Andrea, Alan D},
- doi = {10.1038/nsmb.2222},
- file = {:Users/ryan/Documents/Mendeley Desktop/Kim et al. - 2012 - Regulation of Rev1 by the Fanconi anemia core complex.pdf:pdf},
- issn = {1545-9985},
- journal = {Nature structural {\&} molecular biology},
- keywords = {Fanconi Anemia Complementation Group Proteins,Fanconi Anemia Complementation Group Proteins: met,Gene Expression Regulation,Nuclear Proteins,Nuclear Proteins: metabolism,Nucleotidyltransferases,Nucleotidyltransferases: metabolism,Protein Binding,Protein Stability},
- month = {feb},
- number = {2},
- pages = {164--70},
- pmid = {22266823},
- title = {{Regulation of Rev1 by the Fanconi anemia core complex.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3280818{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {19},
- year = {2012}
- }
- @article{Leek2010,
- abstract = {High-throughput technologies are widely used, for example to assay genetic variants, gene and protein expression, and epigenetic modifications. One often overlooked complication with such studies is batch effects, which occur because measurements are affected by laboratory conditions, reagent lots and personnel differences. This becomes a major problem when batch effects are correlated with an outcome of interest and lead to incorrect conclusions. Using both published studies and our own analyses, we argue that batch effects (as well as other technical and biological artefacts) are widespread and critical to address. We review experimental and computational approaches for doing so.},
- author = {Leek, JT and Scharpf, RB and Bravo, HC},
- doi = {10.1038/nrg2825},
- file = {:Users/ryan/Documents/Mendeley Desktop/Leek, Scharpf, Bravo - 2010 - Tackling the widespread and critical impact of batch effects in high-throughput data.pdf:pdf},
- issn = {1471-0056},
- journal = {Nature Reviews {\ldots}},
- number = {10},
- pages = {733--739},
- publisher = {Nature Publishing Group},
- title = {{Tackling the widespread and critical impact of batch effects in high-throughput data}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/20838408 http://www.nature.com/nrg/journal/vaop/ncurrent/full/nrg2825.html},
- volume = {11},
- year = {2010}
- }
- @article{VanNieuwerburgh2012,
- abstract = {Standard Illumina mate-paired libraries are constructed from 3- to 5-kb DNA fragments by a blunt-end circularization. Sequencing reads that pass through the junction of the two joined ends of a 3-5-kb DNA fragment are not easy to identify and pose problems during mapping and de novo assembly. Longer read lengths increase the possibility that a read will cross the junction. To solve this problem, we developed a mate-paired protocol for use with Illumina sequencing technology that uses Cre-Lox recombination instead of blunt end circularization. In this method, a LoxP sequence is incorporated at the junction site. This sequence allows screening reads for junctions without using a reference genome. Junction reads can be trimmed or split at the junction. Moreover, the location of the LoxP sequence in the reads distinguishes mate-paired reads from spurious paired-end reads. We tested this new method by preparing and sequencing a mate-paired library with an insert size of 3 kb from Saccharomyces cerevisiae. We present an analysis of the library quality statistics and a new bio-informatics tool called DeLoxer that can be used to analyze an IlluminaCre-Lox mate-paired data set. We also demonstrate how the resulting data significantly improves a de novo assembly of the S. cerevisiae genome.},
- author = {{Van Nieuwerburgh}, Filip and Thompson, Ryan C and Ledesma, Jessica and Deforce, Dieter and Gaasterland, Terry and Ordoukhanian, Phillip and Head, Steven R},
- doi = {10.1093/nar/gkr1000},
- file = {:Users/ryan/Documents/Mendeley Desktop/Van Nieuwerburgh et al. - 2012 - Illumina mate-paired DNA sequencing-library preparation using Cre-Lox recombination.pdf:pdf},
- issn = {1362-4962},
- journal = {Nucleic acids research},
- month = {feb},
- number = {3},
- pages = {e24},
- pmid = {22127871},
- title = {{Illumina mate-paired DNA sequencing-library preparation using Cre-Lox recombination.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3273786{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {40},
- year = {2012}
- }
- @article{Kim2019,
- abstract = {The human reference genome represents only a small number of individuals, which limits its usefulness for genotyping. We present a method named HISAT2 (hierarchical indexing for spliced alignment of transcripts 2) that can align both DNA and RNA sequences using a graph Ferragina Manzini index. We use HISAT2 to represent and search an expanded model of the human reference genome in which over 14.5 million genomic variants in combination with haplotypes are incorporated into the data structure used for searching and alignment. We benchmark HISAT2 using simulated and real datasets to demonstrate that our strategy of representing a population of genomes, together with a fast, memory-efficient search algorithm, provides more detailed and accurate variant analyses than other methods. We apply HISAT2 for HLA typing and DNA fingerprinting; both applications form part of the HISAT-genotype software that enables analysis of haplotype-resolved genes or genomic regions. HISAT-genotype outperforms other computational methods and matches or exceeds the performance of laboratory-based assays.},
- author = {Kim, Daehwan and Paggi, Joseph M. and Park, Chanhee and Bennett, Christopher and Salzberg, Steven L.},
- doi = {10.1038/s41587-019-0201-4},
- file = {:Users/ryan/Documents/Mendeley Desktop/Kim et al. - 2019 - Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype.pdf:pdf},
- isbn = {4158701902},
- issn = {1087-0156},
- journal = {Nature Biotechnology},
- number = {8},
- pages = {907--915},
- publisher = {Springer US},
- title = {{Graph-based genome alignment and genotyping with HISAT2 and HISAT-genotype}},
- url = {http://dx.doi.org/10.1038/s41587-019-0201-4 https://www.nature.com/articles/s41587-019-0201-4.pdf},
- volume = {37},
- year = {2019}
- }
- @misc{Zhu2011,
- author = {Zhu, Lihua Julie},
- file = {:Users/ryan/Documents/Mendeley Desktop/Zhu - 2011 - BioC2011 ChIPpeakAnno Practical.pdf:pdf},
- keywords = {R,annotation,chip-seq,presentation,tutorial},
- mendeley-tags = {R,annotation,chip-seq,presentation,tutorial},
- number = {April 2008},
- title = {{BioC2011 : ChIPpeakAnno Practical}},
- url = {http://www.bioconductor.org/help/course-materials/2011/BioC2011/LabStuff/ChIPpeakAnno-BioC2011.pdf},
- year = {2011}
- }
- @article{Mukherjee2003,
- abstract = {A statistical methodology for estimating dataset size requirements for classifying microarray data using learning curves is introduced. The goal is to use existing classification results to estimate dataset size requirements for future classification experiments and to evaluate the gain in accuracy and significance of classifiers built with additional data. The method is based on fitting inverse power-law models to construct empirical learning curves. It also includes a permutation test procedure to assess the statistical significance of classification performance for a given dataset size. This procedure is applied to several molecular classification problems representing a broad spectrum of levels of complexity.},
- author = {Mukherjee, Sayan and Tamayo, Pablo and Rogers, Simon and Rifkin, Ryan and Engle, Anna and Campbell, Colin and Golub, Todd R and Mesirov, Jill P},
- doi = {10.1089/106652703321825928},
- file = {:Users/ryan/Documents/Mendeley Desktop/Mukherjee et al. - 2003 - Estimating dataset size requirements for classifying DNA microarray data.pdf:pdf},
- issn = {1066-5277},
- journal = {Journal of computational biology : a journal of computational molecular cell biology},
- keywords = {Algorithms,Computational Biology,Computational Biology: methods,Computer Simulation,Gene Expression Profiling,Gene Expression Profiling: classification,Gene Expression Profiling: methods,Humans,Models, Molecular,Neoplasms,Neoplasms: classification,Neoplasms: genetics,Neoplasms: metabolism,Oligonucleotide Array Sequence Analysis},
- month = {jan},
- number = {2},
- pages = {119--42},
- pmid = {12804087},
- title = {{Estimating dataset size requirements for classifying DNA microarray data.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/12804087},
- volume = {10},
- year = {2003}
- }
- @article{Sarkar2017,
- author = {Sarkar, Hirak and Zakeri, Mohsen and Malik, Laraib and Patro, Rob},
- doi = {10.1101/138800},
- file = {:Users/ryan/Documents/Mendeley Desktop/Sarkar et al. - 2017 - Towards selective-alignment producing accurate and sensitive alignments using quasi-mapping.pdf:pdf},
- keywords = {alignment,and phrases mapping,quantification,rna-seq,selective alignment},
- pages = {1--14},
- title = {{Towards selective-alignment : producing accurate and sensitive alignments using quasi-mapping}},
- url = {http://www.biorxiv.org/content/biorxiv/early/2017/05/17/138800.full.pdf},
- year = {2017}
- }
- @article{Caviston2011,
- abstract = {Huntingtin (Htt) is a membrane-associated scaffolding protein that interacts with microtubule motors as well as actin-associated adaptor molecules. We examined a role for Htt in the dynein-mediated intracellular trafficking of endosomes and lysosomes. In HeLa cells depleted of either Htt or dynein, early, recycling, and late endosomes (LE)/lysosomes all become dispersed. Despite altered organelle localization, kinetic assays indicate only minor defects in intracellular trafficking. Expression of full-length Htt is required to restore organelle localization in Htt-depleted cells, supporting a role for Htt as a scaffold that promotes functional interactions along its length. In dynein-depleted cells, LE/lysosomes accumulate in tight patches near the cortex, apparently enmeshed by cortactin-positive actin filaments; Latrunculin B-treatment disperses these patches. Peripheral LE/lysosomes in dynein-depleted cells no longer colocalize with microtubules. Htt may be required for this off-loading, as the loss of microtubule association is not seen in Htt-depleted cells or in cells depleted of both dynein and Htt. Inhibition of kinesin-1 relocalizes peripheral LE/lysosomes induced by Htt depletion but not by dynein depletion, consistent with their detachment from microtubules upon dynein knockdown. Together, these data support a model of Htt as a facilitator of dynein-mediated trafficking that may regulate the cytoskeletal association of dynamic organelles.},
- author = {Caviston, Juliane P and Zajac, Allison L and Tokito, Mariko and Holzbaur, Erika L F},
- doi = {10.1091/mbc.E10-03-0233},
- file = {:Users/ryan/Documents/Mendeley Desktop/Caviston et al. - 2011 - Huntingtin coordinates the dynein-mediated dynamic positioning of endosomes and lysosomes.pdf:pdf},
- issn = {1939-4586},
- journal = {Molecular biology of the cell},
- keywords = {Actins,Actins: metabolism,Cell Line, Tumor,Cytoskeleton,Cytoskeleton: metabolism,Dyneins,Dyneins: genetics,Dyneins: metabolism,Endosomes,Endosomes: metabolism,Gene Knockdown Techniques,Gene Knockdown Techniques: methods,HeLa Cells,Humans,Lysosome-Associated Membrane Glycoproteins,Lysosome-Associated Membrane Glycoproteins: metabo,Lysosomes,Lysosomes: metabolism,Microtubule-Associated Proteins,Microtubule-Associated Proteins: metabolism,Microtubule-Associated Proteins: physiology,Microtubules,Microtubules: metabolism,Microtubules: physiology,Molecular Motor Proteins,Molecular Motor Proteins: genetics,Molecular Motor Proteins: metabolism,Nerve Tissue Proteins,Nerve Tissue Proteins: genetics,Nerve Tissue Proteins: metabolism,Nuclear Proteins,Nuclear Proteins: genetics,Nuclear Proteins: metabolism,Organelles,Organelles: metabolism,Polymerization,Protein Transport,Protein Transport: physiology,RNA Interference},
- month = {feb},
- number = {4},
- pages = {478--92},
- pmid = {21169558},
- title = {{Huntingtin coordinates the dynein-mediated dynamic positioning of endosomes and lysosomes.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3038646{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {22},
- year = {2011}
- }
- @article{DeyKushalHsiaoJoyce2016,
- author = {{Dey Kushal, Hsiao Joyce}, Stephens Matthew},
- file = {:Users/ryan/Documents/Mendeley Desktop/Dey Kushal, Hsiao Joyce - 2016 - CountClust Clustering and Visualizing RNA-Seq Expression Data using Grade of Membership Models.pdf:pdf},
- title = {{CountClust: Clustering and Visualizing RNA-Seq Expression Data using Grade of Membership Models}},
- url = {https://github.com/kkdey/CountClust},
- year = {2016}
- }
- @article{Gao2016,
- abstract = {Identifying latent structure in high-dimensional genomic data is essential for exploring biological processes. Here, we consider recovering gene co-expression networks from gene expression data, where each network encodes relationships between genes that are co-regulated by shared biological mechanisms. To do this, we develop a Bayesian statistical model for biclustering to infer subsets of co-regulated genes that covary in all of the samples or in only a subset of the samples. Our biclustering method, BicMix, allows overcomplete representations of the data, computational tractability, and joint modeling of unknown confounders and biological signals. Compared with related biclustering methods, BicMix recovers latent structure with higher precision across diverse simulation scenarios as compared to state-of-the-art biclustering methods. Further, we develop a principled method to recover context specific gene co-expression networks from the estimated sparse biclustering matrices. We apply BicMix to breast cancer gene expression data and to gene expression data from a cardiovascular study cohort, and we recover gene co-expression networks that are differential across ER+ and ER- samples and across male and female samples. We apply BicMix to the Genotype-Tissue Expression (GTEx) pilot data, and we find tissue specific gene networks. We validate these findings by using our tissue specific networks to identify trans-eQTLs specific to one of four primary tissues.},
- archivePrefix = {arXiv},
- arxivId = {1411.1997},
- author = {Gao, Chuan and McDowell, Ian C. and Zhao, Shiwen and Brown, Christopher D. and Engelhardt, Barbara E.},
- doi = {10.1371/journal.pcbi.1004791},
- eprint = {1411.1997},
- file = {:Users/ryan/Documents/Mendeley Desktop/Gao et al. - 2016 - Context Specific and Differential Gene Co-expression Networks via Bayesian Biclustering.pdf:pdf},
- issn = {15537358},
- journal = {PLoS Computational Biology},
- number = {7},
- pages = {1--39},
- pmid = {27467526},
- title = {{Context Specific and Differential Gene Co-expression Networks via Bayesian Biclustering}},
- url = {http://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1004791{\&}type=printable},
- volume = {12},
- year = {2016}
- }
- @article{McDonald2016,
- abstract = {The transcriptional repressor Bcl-6 is linked to the development of both CD4(+) T follicular helper (TFH) and central memory T (TCM) cells. Here, we demonstrate that in response to decreased IL-2 signalling, T helper 1 (TH1) cells upregulate Bcl-6 and co-initiate TFH- and TCM-like gene programs, including expression of the cytokine receptors IL-6R$\alpha$ and IL-7R. Exposure of this potentially bi-potent cell population to IL-6 favours the TFH gene program, whereas IL-7 signalling represses TFH-associated genes including Bcl6 and Cxcr5, but not the TCM-related genes Klf2 and Sell. Mechanistically, IL-7-dependent activation of STAT5 contributes to Bcl-6 repression. Importantly, antigen-specific IL-6R$\alpha$(+)IL-7R(+) CD4(+) T cells emerge from the effector population at late time points post influenza infection. These data support a novel role for IL-7 in the repression of the TFH gene program and evoke a divergent regulatory mechanism by which post-effector TH1 cells may contribute to long-term cell-mediated and humoral immunity.},
- author = {McDonald, Paul W. and Read, Kaitlin A. and Baker, Chandra E. and Anderson, Ashlyn E. and Powell, Michael D. and Ballesteros-Tato, Andr{\'{e}} and Oestreich, Kenneth J.},
- doi = {10.1038/ncomms10285},
- file = {:Users/ryan/Documents/Mendeley Desktop/McDonald et al. - 2016 - IL-7 signalling represses Bcl-6 and the TFH gene program.pdf:pdf},
- issn = {2041-1723},
- journal = {Nature communications},
- pages = {10285},
- pmid = {26743592},
- title = {{IL-7 signalling represses Bcl-6 and the TFH gene program.}},
- url = {http://www.nature.com/doifinder/10.1038/ncomms10285{\%}5Cnhttp://www.ncbi.nlm.nih.gov/pubmed/26743592},
- volume = {7},
- year = {2016}
- }
- @article{Gomes2014,
- abstract = {The comprehension of protein and DNA binding in vivo is essential to understand gene regulation. Chromatin immunoprecipitation followed by sequencing (ChIP-seq) provides a global map of the regulatory binding network. Most ChIP-seq analysis tools focus on identifying binding regions from coverage enrichment. However, less work has been performed to infer the physical and regulatory details inside the enriched regions. This research extends a previous blind-deconvolution approach to develop a post-peak-calling algorithm that improves binding site resolution and predicts cooperative interactions. At the core of our new method is a physically motivated model that characterizes the binding signal as an extreme value distribution. This model suggests a mathematical framework to study physical properties of DNA shearing from the ChIP-seq coverage. The model explains the ChIP-seq coverage with two signals: The first considers DNA fragments with only a single binding event, whereas the second considers fragments with two binding events (a double-binding signal). The model incorporates motif discovery and is able to detect multiple sites in an enriched region with single-nucleotide resolution, high sensitivity, and high specificity. Our method improves peak caller sensitivity, from less than 45{\%} up to 94{\%}, at a false positive rate {\textless} 11{\%} for a set of 47 experimentally validated prokaryotic sites. It also improves resolution of highly enriched regions of large-scale eukaryotic data sets. The double-binding signal provides a novel application in ChIP-seq analysis: the identification of cooperative interaction. Predictions of known cooperative binding sites show a 0.85 area under an ROC curve.},
- author = {Gomes, Antonio L C and Abeel, Thomas and Peterson, Matthew and Azizi, Elham and Lyubetskaya, Anna and Carvalho, Lu{\'{i}}s and Galagan, James},
- doi = {10.1101/gr.161711.113},
- file = {:Users/ryan/Documents/Mendeley Desktop/Gomes et al. - 2014 - Decoding ChIP-seq with a double-binding signal refines binding peaks to single-nucleotides and predicts cooperativ.pdf:pdf},
- issn = {1549-5469},
- journal = {Genome research},
- month = {oct},
- number = {10},
- pages = {1686--97},
- pmid = {25024162},
- title = {{Decoding ChIP-seq with a double-binding signal refines binding peaks to single-nucleotides and predicts cooperative interaction.}},
- url = {http://genome.cshlp.org/content/early/2014/08/29/gr.161711.113.full.pdf{\#}page=1{\&}view=FitH http://www.ncbi.nlm.nih.gov/pubmed/25024162 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC4199365},
- volume = {24},
- year = {2014}
- }
- @article{Roge2013,
- abstract = {SUMMARY: With the advances of RNA sequencing technologies, scientists need new tools to analyze transcriptome data. We introduce RNAseqViewer, a new visualization tool dedicated to RNA-Seq data. The program offers innovative ways to represent transcriptome data for single or multiple samples. It is a handy tool for scientists who use RNA-Seq data to compare multiple transcriptomes, for example, to compare gene expression and alternative splicing of cancer samples or of different development stages.
- AVAILABILITY: RNAseqViewer is freely available for academic use at http://bioinfo.au.tsinghua.edu.cn/software/RNAseqViewer/ CONTACT: zhangxg@tsinghua.edu.cn SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.},
- author = {Rog{\'{e}}, Xavier and Zhang, Xuegong},
- doi = {10.1093/bioinformatics/btt649},
- file = {:Users/ryan/Documents/Mendeley Desktop/Rog{\'{e}}, Zhang - 2013 - RNAseqViewer Visualization tool for RNA-Seq data.pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- month = {nov},
- pages = {2--3},
- pmid = {24215023},
- title = {{RNAseqViewer: Visualization tool for RNA-Seq data.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/24215023},
- year = {2013}
- }
- @article{Sheng2013,
- abstract = {We propose an adaptive truncated product method that facilitates the selection of the truncation point among a set of candidates. To efficiently estimate the distribution of the proposed method when the p-values are correlated, we develop a single-layer bootstrap procedure.},
- author = {Sheng, Xuguang and Yang, Jingyun},
- file = {:Users/ryan/Documents/Mendeley Desktop/Sheng, Yang - 2013 - An adaptive truncated product method for combining dependent p-values.pdf:pdf},
- issn = {0165-1765},
- journal = {Economics letters},
- keywords = {adaptive,aman ullah,eric renault,from discussions with ramo,gencay,jonathan wallen has provid-,jonathan wright and dmitri,p-value,panel cointegration,purchasing power parity,truncated product method,we have benefited greatly,zaykin},
- number = {2},
- pages = {180--182},
- pmid = {23935232},
- title = {{An adaptive truncated product method for combining dependent p-values.}},
- url = {http://nw08.american.edu/{~}sheng/ATPM.pdf http://www.ncbi.nlm.nih.gov/pubmed/23935232 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC3736994},
- volume = {119},
- year = {2013}
- }
- @article{Smyth2012,
- author = {Smyth, Gordon K and Ritchie, Matthew and Thorne, Natalie},
- file = {:Users/ryan/Documents/Mendeley Desktop/Smyth, Ritchie, Thorne - 2012 - Linear Models for Microarray Data User ' s Guide ( Now Including RNA-Seq Data Analysis ).pdf:pdf},
- number = {June},
- title = {{Linear Models for Microarray Data User ' s Guide ( Now Including RNA-Seq Data Analysis )}},
- year = {2012}
- }
- @article{Dobin2012,
- abstract = {MOTIVATION: Accurate alignment of high-throughput RNA-seq data is a challenging and yet unsolved problem because of the non-contiguous transcript structure, relatively short read lengths and constantly increasing throughput of the sequencing technologies. Currently available RNA-seq aligners suffer from high mapping error rates, low mapping speed, read length limitation and mapping biases. RESULTS: To align our large (exceeding 80 billon reads) ENCODE Transcriptome RNA-seq dataset we developed the Spliced Transcripts Alignment to a Reference (STAR) software based on a previously un-described RNA-seq alignment algorithm which utilizes sequential maximum mappable seed search in uncompressed suffix arrays followed by seed clustering and stitching procedure. STAR outperforms other aligners by more than a factor of 50 in mapping speed, aligning to the human genome 550 Million 2x76bp paired-end reads per hour on a modest 12-core server, while at the same time improving alignment sensitivity and precision. In addition to unbiased de novo detection of canonical junctions, STAR can discover non-canonical splices and chimeric (fusion) transcripts, and is also capable of mapping full length RNA sequences. Using Roche 454 sequencing of RT-PCR amplicons, we experimentally validated 1,960 novel intergenic splice junctions with an 80-90{\%} success rate, corroborating the high precision of the STAR mapping strategy.Implementation and AVAILABILITY: STAR is implemented as a standalone C++ code. STAR is free open source software distributed under GPLv3 license and can be downloaded from http://code.google.com/p/rna-star/ CONTACT: dobin@cshl.edu.},
- author = {Dobin, Alexander and Davis, Carrie a and Schlesinger, Felix and Drenkow, Jorg and Zaleski, Chris and Jha, Sonali and Batut, Philippe and Chaisson, Mark and Gingeras, Thomas R},
- doi = {10.1093/bioinformatics/bts635},
- file = {:Users/ryan/Documents/Mendeley Desktop/Dobin et al. - 2013 - STAR ultrafast universal RNA-seq aligner.pdf:pdf},
- issn = {1460-2059},
- journal = {Bioinformatics},
- keywords = {Algorithms,Cluster Analysis,Gene Expression Profiling,Genome,Human,Humans,RNA,RNA Splicing,RNA: methods,Sequence Alignment,Sequence Alignment: methods,Sequence Analysis,Software},
- month = {jan},
- number = {1},
- pages = {15--21},
- pmid = {23104886},
- title = {{STAR: ultrafast universal RNA-seq aligner}},
- url = {https://academic.oup.com/bioinformatics/article-lookup/doi/10.1093/bioinformatics/bts635},
- volume = {29},
- year = {2013}
- }
- @article{Manno2017,
- abstract = {RNA abundance is a powerful indicator of the state of individual cells, but does not directly reveal dynamic processes such as cellular differentiation. Here we show that RNA velocity - the time derivative of RNA abundance - can be estimated by distinguishing unspliced and spliced mRNAs in standard single-cell RNA sequencing protocols. We show that RNA velocity is a vector that predicts the future state of individual cells on a timescale of hours. We validate the accuracy of RNA velocity in the neural crest lineage, demonstrate its use on multiple technical platforms, reconstruct the branching lineage tree of the mouse hippocampus, and measure RNA kinetics in human embryonic brain. We expect RNA velocity to greatly aid the analysis of developmental lineages and cellular dynamics, particularly in humans.},
- author = {Manno, Gioele La and Soldatov, Ruslan and Hochgerner, Hannah and Zeisel, Amit and Petukhov, Viktor and Kastriti, Maria and Lonnerberg, Peter and Furlan, Alessandro and Fan, Jean and Liu, Zehua and van Bruggen, David and Guo, Jimin and Sundstrom, Erik and Castelo-Branco, Goncalo and Adameyko, Igor and Linnarsson, Sten and Kharchenko, Peter},
- doi = {10.1101/206052},
- file = {:Users/ryan/Documents/Mendeley Desktop/Manno et al. - 2017 - RNA velocity in single cells.pdf:pdf},
- issn = {0028-0836},
- journal = {bioRxiv},
- pages = {206052},
- title = {{RNA velocity in single cells}},
- url = {https://www.biorxiv.org/content/early/2017/10/19/206052},
- year = {2017}
- }
- @article{Langmead2012,
- abstract = {As the rate of sequencing increases, greater throughput is demanded from read aligners. The full-text minute index is often used to make alignment very fast and memory-efficient, but the approach is ill-suited to finding longer, gapped alignments. Bowtie 2 combines the strengths of the full-text minute index with the flexibility and speed of hardware-accelerated dynamic programming algorithms to achieve a combination of high speed, sensitivity and accuracy.},
- author = {Langmead, Ben and Salzberg, Steven L},
- doi = {10.1038/nmeth.1923},
- file = {:Users/ryan/Documents/Mendeley Desktop/Langmead, Salzberg - 2012 - Fast gapped-read alignment with Bowtie 2.pdf:pdf},
- issn = {1548-7105},
- journal = {Nature methods},
- keywords = {Algorithms,Computational Biology,Computational Biology: methods,Databases, Genetic,Genome, Human,Genome, Human: genetics,Humans,Sequence Alignment,Sequence Alignment: methods,Sequence Analysis, DNA,Sequence Analysis, DNA: methods},
- month = {apr},
- number = {4},
- pages = {357--9},
- pmid = {22388286},
- title = {{Fast gapped-read alignment with Bowtie 2.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/22388286},
- volume = {9},
- year = {2012}
- }
- @article{Turro2013,
- abstract = {MOTIVATION: Most methods for estimating differential expression from RNA-seq are based on statistics that compare normalized read counts between treatment classes. Unfortunately, reads are in general too short to be mapped unambiguously to features of interest, such as genes, isoforms or haplotype-specific isoforms. There are methods for estimating expression levels that account for this source of ambiguity. However, the uncertainty is not generally accounted for in downstream analysis of gene expression experiments. Moreover, at the individual transcript level, it can sometimes be too large to allow useful comparisons between treatment groups. RESULTS: In this article we make two proposals that improve the power, specificity and versatility of expression analysis using RNA-seq data. First, we present a Bayesian method for model selection that accounts for read mapping ambiguities using random effects. This polytomous model selection approach can be used to identify many interesting patterns of gene expression and is not confined to detecting differential expression between two groups. For illustration, we use our method to detect imprinting, different types of regulatory divergence in cis and in trans and differential isoform usage, but many other applications are possible. Second, we present a novel collapsing algorithm for grouping transcripts into inferential units that exploits the posterior correlation between transcript expression levels. The aggregate expression levels of these units can be estimated with useful levels of uncertainty. Our algorithm can improve the precision of expression estimates when uncertainty is large with only a small reduction in biological resolution.},
- author = {Turro, Ernest and Astle, William J and Tavar, Simon and Tavar{\'{e}}, Simon},
- doi = {10.1093/bioinformatics/btt624},
- file = {:Users/ryan/Documents/Mendeley Desktop/Turro et al. - 2014 - Flexible analysis of RNA-seq data using mixed effects models.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Turro et al. - 2014 - Flexible analysis of RNA-seq data using mixed effects models(2).pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- month = {jan},
- number = {2},
- pages = {180--8},
- pmid = {24281695},
- title = {{Flexible analysis of RNA-seq data using mixed effects models.}},
- url = {http://bioinformatics.oxfordjournals.org/content/30/2/180.short http://www.ncbi.nlm.nih.gov/pubmed/24281695},
- volume = {30},
- year = {2014}
- }
- @article{Si2013,
- abstract = {The recent RNA-seq technology is an attractive method to study gene expression. One of the most important goals in RNA-seq data analysis is to detect genes differentially expressed across treatments. Although several statistical methods have been published, there are no theoretical justifications for whether these methods are optimal or how to search for the optimal test. Furthermore, most proposed tests are designed for testing whether the mean expression levels are exactly the same or not across treatments, whereas sometimes, biologists are interested in detecting genes with expression changes larger than a certain threshold. Another issue with current methods is that the false discovery rate (FDR) control is not well studied. In this manuscript, we propose a test to address all the above issues. Under model assumptions, we derive an optimal test that achieves the maximum of average power among those that control FDR at the same level. We also provide an approximated version, the approximated most average powerful (AMAP) test, for practical implementation. The proposed method allows for testing null hypotheses that are much more general than the ones most previous studies have considered, and it leads to a natural way of controlling the FDR. Through simulation studies, we show that our test has a higher power than other methods, including the widely-used edgeR, DESeq, and baySeq methods, as well as better FDR control than two other FDR control procedures commonly used in practice. For demonstration, we also apply the proposed method to a real RNA-seq dataset obtained from maize.},
- author = {Si, Yaqing and Liu, Peng},
- doi = {10.1111/biom.12036},
- file = {:Users/ryan/Documents/Mendeley Desktop/Si, Liu - 2013 - An optimal test with maximum average power while controlling FDR with application to RNA-seq data.pdf:pdf},
- issn = {1541-0420},
- journal = {Biometrics},
- keywords = {empirical bayes,fdr control,gene expression,maximum average power,rna-seq},
- month = {jul},
- pages = {1--12},
- pmid = {23889143},
- title = {{An optimal test with maximum average power while controlling FDR with application to RNA-seq data.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/23889143},
- year = {2013}
- }
- @article{Hummel2008,
- abstract = {MOTIVATION: Several authors have studied expression in gene sets with specific goals: overrepresentation of interesting genes in functional groups, predictive power for class membership and searches for groups where the constituent genes show coordinated changes in expression under the experimental conditions. The purpose of this article is to follow the third direction. One important aspect is that the gene sets under analysis are known a priori and are not determined from the experimental data at hand. Our goal is to provide a methodology that helps to identify the relevant structural constituents (phenotypical, experimental design, biological component) that determine gene expression in a group.
- RESULTS: Gene-wise linear models are used to formalize the structural aspects of a study. The full model is contrasted with a reduced model that lacks the relevant design component. A comparison with respect to goodness of fit is made and quantified. An asymptotic test and a permutation test are derived to test the null hypothesis that the reduced model sufficiently explains the observed expression within the gene group of interest. Graphical tools are available to illustrate and interpret the results of the analysis. Examples demonstrate the wide range of application.
- AVAILABILITY: The R-package GlobalAncova (http://www.bioconductor.org) offers data and functions as well as a vignette to guide the user through specific analysis steps.},
- author = {Hummel, Manuela and Meister, Reinhard and Mansmann, Ulrich},
- doi = {10.1093/bioinformatics/btm531},
- file = {:Users/ryan/Documents/Mendeley Desktop/Hummel, Meister, Mansmann - 2008 - GlobalANCOVA exploration and assessment of gene group effects.pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {Algorithms,Computer Simulation,Gene Expression Profiling,Gene Expression Profiling: methods,Models, Biological,Multigene Family,Multigene Family: physiology,Proteome,Proteome: metabolism,Signal Transduction,Signal Transduction: physiology,Software,User-Computer Interface},
- month = {jan},
- number = {1},
- pages = {78--85},
- pmid = {18024976},
- title = {{GlobalANCOVA: exploration and assessment of gene group effects.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/18024976},
- volume = {24},
- year = {2008}
- }
- @article{Schatz2007,
- abstract = {The recent availability of new, less expensive high-throughput DNA sequencing technologies has yielded a dramatic increase in the volume of sequence data that must be analyzed. These data are being generated for several purposes, including genotyping, genome resequencing, metagenomics, and de novo genome assembly projects. Sequence alignment programs such as MUMmer have proven essential for analysis of these data, but researchers will need ever faster, high-throughput alignment tools running on inexpensive hardware to keep up with new sequence technologies.},
- author = {Schatz, Michael C and Trapnell, Cole and Delcher, Arthur L and Varshney, Amitabh},
- doi = {10.1186/1471-2105-8-474},
- file = {:Users/ryan/Documents/Mendeley Desktop/Schatz et al. - 2007 - High-throughput sequence alignment using Graphics Processing Units.pdf:pdf},
- issn = {1471-2105},
- journal = {BMC bioinformatics},
- keywords = {Animals,Bacillus anthracis,Bacillus anthracis: genetics,Base Sequence,Caenorhabditis,Caenorhabditis: genetics,Computer Graphics,Computer Graphics: economics,Computer Graphics: instrumentation,Computers,Computers: economics,Contig Mapping,Contig Mapping: economics,Contig Mapping: instrumentation,DNA,DNA: ultrastructure,Database Management Systems,Databases, Genetic,Genomic Library,Listeria monocytogenes,Listeria monocytogenes: genetics,Sequence Alignment,Sequence Alignment: economics,Sequence Alignment: instrumentation,Sequence Alignment: methods,Sequence Analysis, DNA,Sequence Analysis, DNA: economics,Sequence Analysis, DNA: instrumentation,Sequence Analysis, DNA: methods,Streptococcus suis,Streptococcus suis: genetics,Time Factors,Work Simplification},
- month = {jan},
- pages = {474},
- pmid = {18070356},
- title = {{High-throughput sequence alignment using Graphics Processing Units.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2222658{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {8},
- year = {2007}
- }
- @article{Efron2002,
- author = {Efron, Bradley and Tibshirani, Robert},
- doi = {10.1002/gepi.01124},
- file = {:Users/ryan/Documents/Mendeley Desktop/Efron, Tibshirani - 2002 - Empirical Bayes Methods and False Discovery Rates for Microarrays.pdf:pdf},
- keywords = {a posteriori probability of,gene,multiple comparisons,simultaneous hypothesis tests},
- number = {March},
- pages = {70--86},
- title = {{Empirical Bayes Methods and False Discovery Rates for Microarrays}},
- volume = {86},
- year = {2002}
- }
- @article{Liang2012a,
- abstract = {Increasing number of ChIP-seq experiments are investigating transcription factor binding under multiple experimental conditions, for example, various treatment conditions, several distinct time points and different treatment dosage levels. Hence, identifying differential binding sites across multiple conditions is of practical importance in biological and medical research. To this end, we have developed a powerful and flexible program, called DBChIP, to detect differentially bound sharp binding sites across multiple conditions, with or without matching control samples. By assigning uncertainty measure to the putative differential binding sites, DBChIP facilitates downstream analysis. DBChIP is implemented in R programming language and can work with a wide range of sequencing file formats.},
- author = {Liang, Kun and Keles, S{\"{u}}nd{\"{u}}z},
- doi = {10.1093/bioinformatics/btr605},
- file = {:Users/ryan/Documents/Mendeley Desktop/Liang, Keles - 2012 - Detecting differential binding of transcription factors with ChIP-seq.pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- month = {jan},
- number = {1},
- pages = {121--2},
- pmid = {22057161},
- title = {{Detecting differential binding of transcription factors with ChIP-seq.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/22057161},
- volume = {28},
- year = {2012}
- }
- @article{Zerbino2018,
- abstract = {{\textcopyright} The Author(s) 2016. Ensembl (www.ensembl.org) is a database and genome browser for enabling research on vertebrate genomes. We import, analyse, curate and integrate a diverse collection of large-scale reference data to create a more comprehensive view of genome biology than would be possible from any individual dataset. Our extensive data resources include evidence-based gene and regulatory region annotation, genome variation and gene trees. An accompanying suite of tools, infrastructure and programmatic access methods ensure uniform data analysis and distribution for all supported species. Together, these provide a comprehensive solution for large-scale and targeted genomics applications alike. Among many other developments over the past year, we have improved our resources for gene regulation and comparative genomics, and added CRISPR/Cas9 target sites. We released new browser functionality and tools, including improved filtering and prioritization of genome variation, Manhattan plot visualization for linkage disequilibrium and eQTL data, and an ontology search for phenotypes, traits and disease. We have also enhanced data discovery and access with a track hub registry and a selection of new REST end points. All Ensembl data are freely released to the scientific community and our source code is available via the open source Apache 2.0 license.},
- author = {Zerbino, Daniel R. and Achuthan, Premanand and Akanni, Wasiu and Amode, M Ridwan and Barrell, Daniel and Bhai, Jyothish and Billis, Konstantinos and Cummins, Carla and Gall, Astrid and Gir{\'{o}}n, Carlos Garc{\'{i}}a and Gil, Laurent and Gordon, Leo and Haggerty, Leanne and Haskell, Erin and Hourlier, Thibaut and Izuogu, Osagie G. and Janacek, Sophie H. and Juettemann, Thomas and To, Jimmy Kiang and Laird, Matthew R. and Lavidas, Ilias and Liu, Zhicheng and Loveland, Jane E. and Maurel, Thomas and McLaren, William and Moore, Benjamin and Mudge, Jonathan and Murphy, Daniel N. and Newman, Victoria and Nuhn, Michael and Ogeh, Denye and Ong, Chuang Kee and Parker, Anne and Patricio, Mateus and Riat, Harpreet Singh and Schuilenburg, Helen and Sheppard, Dan and Sparrow, Helen and Taylor, Kieron and Thormann, Anja and Vullo, Alessandro and Walts, Brandon and Zadissa, Amonida and Frankish, Adam and Hunt, Sarah E. and Kostadima, Myrto and Langridge, Nicholas and Martin, Fergal J. and Muffato, Matthieu and Perry, Emily and Ruffier, Magali and Staines, Dan M. and Trevanion, Stephen J. and Aken, Bronwen L. and Cunningham, Fiona and Yates, Andrew and Flicek, Paul},
- doi = {10.1093/nar/gkx1098},
- file = {:Users/ryan/Documents/Mendeley Desktop/Zerbino et al. - 2018 - Ensembl 2018.pdf:pdf},
- issn = {0305-1048},
- journal = {Nucleic Acids Research},
- month = {jan},
- number = {D1},
- pages = {D754--D761},
- title = {{Ensembl 2018}},
- url = {http://academic.oup.com/nar/article/46/D1/D754/4634002},
- volume = {46},
- year = {2018}
- }
- @article{Jin2009,
- abstract = {To understand how chromatin structure is organized by different histone variants, we have measured the genome-wide distribution of nucleosome core particles (NCPs) containing the histone variants H3.3 and H2A.Z in human cells. We find that a special class of NCPs containing both variants is enriched at 'nucleosome-free regions' of active promoters, enhancers and insulator regions. We show that preparative methods used previously in studying nucleosome structure result in the loss of these unstable double-variant NCPs. It seems likely that this instability facilitates the access of transcription factors to promoters and other regulatory sites in vivo. Other combinations of variants have different distributions, consistent with distinct roles for histone variants in the modulation of gene expression.},
- author = {Jin, Chunyuan and Zang, Chongzhi and Wei, Gang and Cui, Kairong and Peng, Weiqun and Zhao, Keji and Felsenfeld, Gary},
- doi = {10.1038/ng.409},
- file = {:Users/ryan/Documents/Mendeley Desktop/Jin et al. - 2009 - H3.3H2A.Z double variant–containing nucleosomes mark 'nucleosome-free regions' of active promoters and other regul.pdf:pdf},
- issn = {1061-4036},
- journal = {Nature Genetics},
- month = {aug},
- number = {8},
- pages = {941--945},
- publisher = {Nature Publishing Group},
- title = {{H3.3/H2A.Z double variant–containing nucleosomes mark 'nucleosome-free regions' of active promoters and other regulatory regions}},
- url = {http://www.nature.com/articles/ng.409},
- volume = {41},
- year = {2009}
- }
- @article{Warden2013,
- abstract = {BD-Func (BiDirectional FUNCtional enrichment) is an algorithm that calculates functional enrichment by comparing lists of pre-defined genes that are known to be activated versus inhibited in a pathway or by a regulatory molecule. This paper shows that BD-Func can correctly predict cell line alternations and patient characteristics with accuracy comparable to popular algorithms, with a significantly faster run-time. BD-Func can compare scores for individual samples across multiple groups as well as provide predictive statistics and receiver operating characteristic (ROC) plots to quantify the accuracy of the signature associated with a binary phenotypic variable. BD-Func facilitates collaboration and reproducibility by encouraging users to share novel molecular signatures in the BD-Func discussion group, which is where the novel progesterone receptor and LBH589 signatures from this paper can be found. The novel LBH589 signature presented in this paper also serves as a case study showing how a custom signature using cell line data can accurately predict activity in vivo. This software is available to download at https://sourceforge.net/projects/bdfunc/.},
- author = {Warden, Charles D and Kanaya, Noriko and Chen, Shiuan and Yuan, Yate-Ching},
- doi = {10.7717/peerj.159},
- file = {:Users/ryan/Documents/Mendeley Desktop/Warden et al. - 2013 - BD-Func a streamlined algorithm for predicting activation and inhibition of pathways.pdf:pdf},
- issn = {2167-8359},
- journal = {PeerJ},
- month = {jan},
- pages = {e159},
- pmid = {24058887},
- title = {{BD-Func: a streamlined algorithm for predicting activation and inhibition of pathways.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3775632{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {1},
- year = {2013}
- }
- @article{Klein2015,
- abstract = {In the biology of tissue development and diseases, DNA methylation plays an important role. For a deeper understanding, it is crucial to accurately compare DNA methylation patterns between groups of samples representing different conditions. A widely used method to investigate DNA methylation in the CpG context is bisulfite sequencing, which produces data on the single-nucleotide scale. While there are benefits to analyzing CpG sites on a basepair level, there are both biological and statistical reasons to test entire genomic regions for differential methylation. However, the analysis of DNA methylation is hampered by the lack of best practice standards. Here, we compared multiple approaches for testing predefined genomic regions for differential DNA methylation in bisulfite sequencing data. Nine methods were evaluated: BiSeq, COHCAP, Goeman's Global Test, Limma, methylKit/eDMR, RADMeth and three log-linear regression approaches with different distribution assumptions. We applied these methods to simulated data and determined their sensitivity and specificity. This revealed performance differences, which were also seen when applied to real data. Methods that first test single CpG sites and then test regions based on transformed CpG-wise P-values performed better than methods that summarize methylation levels or raw reads. Interestingly, smoothing of methylation levels had a negligible impact. In particular, Global Test, BiSeq and RADMeth/z-test outperformed the other methods we evaluated, providing valuable guidance for more accurate analysis of DNA methylation.},
- author = {Klein, Hans-Ulrich and Hebestreit, Katja},
- doi = {10.1093/bib/bbv095},
- file = {:Users/ryan/Documents/Mendeley Desktop/Klein, Hebestreit - 2015 - An evaluation of methods to test predefined genomic regions for differential methylation in bisulfite sequenc.pdf:pdf},
- isbn = {1477-4054 (Electronic)$\backslash$r1467-5463 (Linking)},
- issn = {1467-5463},
- journal = {Briefings in Bioinformatics},
- keywords = {bisulfite sequencing,differentially methylated regions,dna methylation},
- number = {September},
- pages = {bbv095},
- pmid = {26515532},
- title = {{An evaluation of methods to test predefined genomic regions for differential methylation in bisulfite sequencing data}},
- url = {http://bib.oxfordjournals.org/lookup/doi/10.1093/bib/bbv095},
- year = {2015}
- }
- @article{Holt2011,
- abstract = {Second-generation sequencing technologies are precipitating major shifts with regards to what kinds of genomes are being sequenced and how they are annotated. While the first generation of genome projects focused on well-studied model organisms, many of today's projects involve exotic organisms whose genomes are largely terra incognita. This complicates their annotation, because unlike first-generation projects, there are no pre-existing 'gold-standard' gene-models with which to train gene-finders. Improvements in genome assembly and the wide availability of mRNA-seq data are also creating opportunities to update and re-annotate previously published genome annotations. Today's genome projects are thus in need of new genome annotation tools that can meet the challenges and opportunities presented by second-generation sequencing technologies.},
- author = {Holt, Carson and Yandell, Mark},
- doi = {10.1186/1471-2105-12-491},
- file = {:Users/ryan/Documents/Mendeley Desktop/Holt, Yandell - 2011 - MAKER2 an annotation pipeline and genome-database management tool for second-generation genome projects.pdf:pdf},
- issn = {1471-2105},
- journal = {BMC bioinformatics},
- keywords = {Animals,Databases, Genetic,Genome,High-Throughput Nucleotide Sequencing,High-Throughput Nucleotide Sequencing: methods,Humans,Molecular Sequence Annotation,Plants,Plants: genetics,Software},
- month = {jan},
- number = {1},
- pages = {491},
- pmid = {22192575},
- publisher = {BioMed Central Ltd},
- title = {{MAKER2: an annotation pipeline and genome-database management tool for second-generation genome projects.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3280279{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {12},
- year = {2011}
- }
- @article{Koster2012,
- abstract = {SUMMARY: Snakemake is a workflow engine that provides a readable Python-based workflow definition language and a powerful execution environment that scales from single-core workstations to compute clusters without modifying the workflow. It is the first system to support the use of automatically inferred multiple named wildcards (or variables) in input and output filenames.$\backslash$n$\backslash$nAVAILABILITY: http://snakemake.googlecode.com.$\backslash$n$\backslash$nCONTACT: johannes.koester@uni-due.de.},
- author = {K{\"{o}}ster, Johannes and Rahmann, Sven},
- doi = {10.1093/bioinformatics/bts480},
- file = {:Users/ryan/Documents/Mendeley Desktop/K{\"{o}}ster, Rahmann - 2012 - Snakemake-a scalable bioinformatics workflow engine.pdf:pdf},
- isbn = {1367-4811 (Linking)},
- issn = {13674803},
- journal = {Bioinformatics},
- number = {19},
- pages = {2520--2522},
- pmid = {22908215},
- title = {{Snakemake-a scalable bioinformatics workflow engine}},
- url = {http://bioinformatics.oxfordjournals.org/content/early/2012/08/17/bioinformatics.bts480.full.pdf},
- volume = {28},
- year = {2012}
- }
- @article{Huber2015,
- author = {Huber, Wolfgang and Carey, Vincent J and Gentleman, Robert and Anders, Simon and Carlson, Marc and Carvalho, Benilton S and Bravo, Hector Corrada and Davis, Sean and Gatto, Laurent and Girke, Thomas and Gottardo, Raphael and Hahne, Florian and Hansen, Kasper D and Irizarry, Rafael a and Lawrence, Michael and Love, Michael I and Macdonald, James and Obenchain, Valerie and Ole{\'{s}}, Andrzej K and Pag{\`{e}}s, Herv{\'{e}} and Reyes, Alejandro and Shannon, Paul and Smyth, Gordon K and Tenenbaum, Dan and Waldron, Levi and Morgan, Martin},
- doi = {10.1038/nmeth.3252},
- file = {:Users/ryan/Documents/Mendeley Desktop/Huber et al. - 2015 - Orchestrating high-throughput genomic analysis with Bioconductor.pdf:pdf},
- issn = {1548-7091},
- journal = {Nature Publishing Group},
- number = {2},
- pages = {115--121},
- publisher = {Nature Publishing Group},
- title = {{Orchestrating high-throughput genomic analysis with Bioconductor}},
- url = {http://dx.doi.org/10.1038/nmeth.3252},
- volume = {12},
- year = {2015}
- }
- @article{VandeWiel2016,
- abstract = {{\textcopyright} 2015 John Wiley {\&} Sons, Ltd. For many high-dimensional studies, additional information on the variables, like (genomic) annotation or external p-values, is available. In the context of binary and continuous prediction, we develop a method for adaptive group-regularized (logistic) ridge regression, which makes structural use of such 'co-data'. Here, 'groups' refer to a partition of the variables according to the co-data. We derive empirical Bayes estimates of group-specific penalties, which possess several nice properties: (i) They are analytical. (ii) They adapt to the informativeness of the co-data for the data at hand. (iii) Only one global penalty parameter requires tuning by cross-validation. In addition, the method allows use of multiple types of co-data at little extra computational effort. We show that the group-specific penalties may lead to a larger distinction between 'near-zero' and relatively large regression parameters, which facilitates post hoc variable selection. The method, termed GRridge, is implemented in an easy-to-use R-package. It is demonstrated on two cancer genomics studies, which both concern the discrimination of precancerous cervical lesions from normal cervix tissues using methylation microarray data. For both examples, GRridge clearly improves the predictive performances of ordinary logistic ridge regression and the group lasso. In addition, we show that for the second study, the relatively good predictive performance is maintained when selecting only 42 variables.},
- archivePrefix = {arXiv},
- arxivId = {arXiv:1411.3496v1},
- author = {van de Wiel, Mark A. and Lien, Tonje G. and Verlaat, Wina and van Wieringen, Wessel N. and Wilting, Saskia M.},
- doi = {10.1002/sim.6732},
- eprint = {arXiv:1411.3496v1},
- file = {:Users/ryan/Documents/Mendeley Desktop/van de Wiel et al. - 2016 - Better prediction by use of co-data Adaptive group-regularized ridge regression.pdf:pdf},
- isbn = {0277-6715},
- issn = {10970258},
- journal = {Statistics in Medicine},
- keywords = {Classification,Empirical Bayes,Logistic ridge regression,Methylation,Random forest,Variable selection},
- number = {3},
- pages = {368--381},
- pmid = {26365903},
- title = {{Better prediction by use of co-data: Adaptive group-regularized ridge regression}},
- url = {https://arxiv.org/pdf/1411.3496.pdf},
- volume = {35},
- year = {2016}
- }
- @article{Chang2008,
- abstract = {BACKGROUND: Alternative RNA splicing greatly increases proteome diversity and thereby contribute to species- or tissue-specific functions. The possibility to study alternative splicing (AS) events on a genomic scale using splicing-sensitive microarrays, including the Affymetrix GeneChip Exon 1.0 ST microarray (exon array), has appeared very recently. However, the application of this new technology is hindered by the lack of free and user-friendly software devoted to these novel platforms.
- RESULTS: In this study we present a Java-based freeware, easyExon http://microarray.ym.edu.tw/easyexon, to process, filtrate and visualize exon array data with an analysis pipeline. This tool implements the most commonly used probeset summarization methods as well as AS-orientated filtration algorithms, e.g. MIDAS and PAC, for the detection of alternative splicing events. We include a biological filtration function according to GO terms, and provide a module to visualize and interpret the selected exons and transcripts. Furthermore, easyExon can integrate with other related programs, such as Integrate Genome Browser (IGB) and Affymetrix Power Tools (APT), to make the whole analysis more comprehensive. We applied easyExon on a public accessible colon cancer dataset as an example to illustrate the analysis pipeline of this tool.
- CONCLUSION: EasyExon can efficiently process and analyze the Affymetrix exon array data. The simplicity, flexibility and brevity of easyExon make it a valuable tool for AS event identification in genomic research.},
- author = {Chang, Ting-Yu and Li, Yin-Yi and Jen, Chih-Hung and Yang, Tsun-Po and Lin, Chi-Hung and Hsu, Ming-Ta and Wang, Hsei-Wei},
- doi = {10.1186/1471-2105-9-432},
- file = {:Users/ryan/Documents/Mendeley Desktop/Chang et al. - 2008 - easyExon--a Java-based GUI tool for processing and visualization of Affymetrix exon array data.pdf:pdf},
- issn = {1471-2105},
- journal = {BMC bioinformatics},
- keywords = {Alternative Splicing,Alternative Splicing: genetics,Animals,Exons,Gene Expression Profiling,Gene Expression Profiling: methods,Humans,Information Storage and Retrieval,Information Storage and Retrieval: methods,Mice,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Rats,User-Computer Interface},
- month = {jan},
- pages = {432},
- pmid = {18851762},
- title = {{easyExon--a Java-based GUI tool for processing and visualization of Affymetrix exon array data.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2579307{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {9},
- year = {2008}
- }
- @article{Jeanmougin2010,
- abstract = {High-throughput post-genomic studies are now routinely and promisingly investigated in biological and biomedical research. The main statistical approach to select genes differentially expressed between two groups is to apply a t-test, which is subject of criticism in the literature. Numerous alternatives have been developed based on different and innovative variance modeling strategies. However, a critical issue is that selecting a different test usually leads to a different gene list. In this context and given the current tendency to apply the t-test, identifying the most efficient approach in practice remains crucial. To provide elements to answer, we conduct a comparison of eight tests representative of variance modeling strategies in gene expression data: Welch's t-test, ANOVA [1], Wilcoxon's test, SAM [2], RVM [3], limma [4], VarMixt [5] and SMVar [6]. Our comparison process relies on four steps (gene list analysis, simulations, spike-in data and re-sampling) to formulate comprehensive and robust conclusions about test performance, in terms of statistical power, false-positive rate, execution time and ease of use. Our results raise concerns about the ability of some methods to control the expected number of false positives at a desirable level. Besides, two tests (limma and VarMixt) show significant improvement compared to the t-test, in particular to deal with small sample sizes. In addition limma presents several practical advantages, so we advocate its application to analyze gene expression data.},
- author = {Jeanmougin, Marine and de Reynies, Aurelien and Marisa, Laetitia and Paccard, Caroline and Nuel, Gregory and Guedj, Mickael},
- doi = {10.1371/journal.pone.0012336},
- file = {:Users/ryan/Documents/Mendeley Desktop/Jeanmougin et al. - 2010 - Should we abandon the t-test in the analysis of gene expression microarray data a comparison of variance mode.pdf:pdf},
- issn = {1932-6203},
- journal = {PloS one},
- keywords = {Analysis of Variance,Computer Simulation,Gene Expression Profiling,Gene Expression Profiling: statistics {\&} numerical,Models, Statistical,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: statistic},
- month = {jan},
- number = {9},
- pages = {e12336},
- pmid = {20838429},
- title = {{Should we abandon the t-test in the analysis of gene expression microarray data: a comparison of variance modeling strategies.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2933223{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {5},
- year = {2010}
- }
- @article{Zitnik,
- abstract = {New technologies have enabled the investigation of biology and human health at an unprecedented scale and in multiple dimen-sions. These dimensions include myriad properties describing genome, epigenome, transcriptome, microbiome, phenotype, and lifestyle. No single data type, however, can capture the complexity of all the factors relevant to understanding a phenomenon such as a disease. Integrative methods that combine data from multiple technologies have thus emerged as critical statistical and computational approaches. The key challenge in developing such approaches is the identification of effective models to provide a comprehensive and relevant systems view. An ideal method can answer a biological or medical question, identifying important fea-tures and predicting outcomes, by harnessing heterogeneous data across several dimensions of biological variation. In this Review, we describe the principles of data integration and discuss current methods and available implementations. We provide examples of successful data integration in biology and medicine. Finally, we discuss current challenges in biomedical integrative methods and our perspective on the future development of the field.},
- archivePrefix = {arXiv},
- arxivId = {1807.00123},
- author = {Zitnik, Marinka and Nguyen, Francis and Wang, Bo and Leskovec, Jure and Goldenberg, Anna and Hoffman, Michael M},
- eprint = {1807.00123},
- file = {:Users/ryan/Documents/Mendeley Desktop/Zitnik et al. - Unknown - Machine Learning for Integrating Data in Biology and Medicine Principles, Practice, and Opportunities.pdf:pdf},
- keywords = {computational biology,heterogeneous data,machine learning,personalized medicine,systems biology},
- title = {{Machine Learning for Integrating Data in Biology and Medicine: Principles, Practice, and Opportunities}},
- url = {https://arxiv.org/pdf/1807.00123.pdf}
- }
- @article{Stadler2011,
- abstract = {Methylation of cytosines is an essential epigenetic modification in mammalian genomes, yet the rules that govern methylation patterns remain largely elusive. To gain insights into this process, we generated base-pair-resolution mouse methylomes in stem cells and neuronal progenitors. Advanced quantitative analysis identified low-methylated regions (LMRs) with an average methylation of 30{\%}. These represent CpG-poor distal regulatory regions as evidenced by location, DNase I hypersensitivity, presence of enhancer chromatin marks and enhancer activity in reporter assays. LMRs are occupied by DNA-binding factors and their binding is necessary and sufficient to create LMRs. A comparison of neuronal and stem-cell methylomes confirms this dependency, as cell-type-specific LMRs are occupied by cell-type-specific transcription factors. This study provides methylome references for the mouse and shows that DNA-binding factors locally influence DNA methylation, enabling the identification of active regulatory regions.},
- author = {Stadler, Michael B and Murr, Rabih and Burger, Lukas and Ivanek, Robert and Lienert, Florian and Sch{\"{o}}ler, Anne and Wirbelauer, Christiane and Oakeley, Edward J and Gaidatzis, Dimos and Tiwari, Vijay K and Sch{\"{u}}beler, Dirk},
- doi = {10.1038/nature10716},
- file = {:Users/ryan/Documents/Mendeley Desktop/Stadler et al. - 2011 - DNA-binding factors shape the mouse methylome at distal regulatory regions.pdf:pdf},
- issn = {1476-4687},
- journal = {Nature},
- keywords = {Animals,Cell Differentiation,CpG Islands,Cytosine,Cytosine: metabolism,DNA Methylation,DNA-Binding Proteins,DNA-Binding Proteins: metabolism,Embryonic Stem Cells,Embryonic Stem Cells: cytology,Epigenomics,Mice,Neurons,Neurons: cytology,Promoter Regions, Genetic,Promoter Regions, Genetic: genetics,Protein Binding,Stem Cells,Stem Cells: cytology,Transcription Factors,Transcription Factors: metabolism},
- month = {dec},
- number = {7378},
- pages = {490--5},
- pmid = {22170606},
- title = {{DNA-binding factors shape the mouse methylome at distal regulatory regions.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/22170606},
- volume = {480},
- year = {2011}
- }
- @article{Kerr2000,
- abstract = {Spotted cDNA microarrays are emerging as a powerful and cost-effective tool for large-scale analysis of gene expression. Microarrays can be used to measure the relative quantities of specific mRNAs in two or more tissue samples for thousands of genes simultaneously. While the power of this technology has been recognized, many open questions remain about appropriate analysis of microarray data. One question is how to make valid estimates of the relative expression for genes that are not biased by ancillary sources of variation. Recognizing that there is inherent "noise" in microarray data, how does one estimate the error variation associated with an estimated change in expression, i.e., how does one construct the error bars? We demonstrate that ANOVA methods can be used to normalize microarray data and provide estimates of changes in gene expression that are corrected for potential confounding effects. This approach establishes a framework for the general analysis and interpretation of microarray data.},
- author = {Kerr, M K and Martin, M and Churchill, G a},
- doi = {10.1089/10665270050514954},
- file = {:Users/ryan/Documents/Mendeley Desktop/Kerr, Martin, Churchill - 2000 - Analysis of variance for gene expression microarray data.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Kerr, Martin, Churchill - 2000 - Analysis of variance for gene expression microarray data(2).pdf:pdf},
- issn = {1066-5277},
- journal = {Journal of computational biology : a journal of computational molecular cell biology},
- keywords = {Computer-Assisted,Female,Humans,Image Processing,Least-Squares Analysis,Liver,Liver: physiology,Male,Muscle,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Placenta,Placenta: physiology,Pregnancy,Reproducibility of Results,Skeletal,Skeletal: physiology},
- month = {jan},
- number = {6},
- pages = {819--37},
- pmid = {11382364},
- title = {{Analysis of variance for gene expression microarray data.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3268235{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {7},
- year = {2000}
- }
- @article{Spyrou2009,
- abstract = {High-throughput sequencing technology has become popular and widely used to study protein and DNA interactions. Chromatin immunoprecipitation, followed by sequencing of the resulting samples, produces large amounts of data that can be used to map genomic features such as transcription factor binding sites and histone modifications.},
- author = {Spyrou, Christiana and Stark, Rory and Lynch, Andy G and Tavar{\'{e}}, Simon},
- doi = {10.1186/1471-2105-10-299},
- file = {:Users/ryan/Documents/Mendeley Desktop/Spyrou et al. - 2009 - BayesPeak Bayesian analysis of ChIP-seq data.pdf:pdf},
- isbn = {1471210510},
- issn = {1471-2105},
- journal = {BMC bioinformatics},
- keywords = {Bayes Theorem,Binding Sites,Chromatin Immunoprecipitation,Chromatin Immunoprecipitation: methods,Computational Biology,Computational Biology: methods,DNA,DNA: chemistry,DNA: metabolism,Proteins,Proteins: metabolism},
- month = {jan},
- pages = {299},
- pmid = {19772557},
- title = {{BayesPeak: Bayesian analysis of ChIP-seq data.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2760534{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {10},
- year = {2009}
- }
- @article{Au2013,
- abstract = {Although transcriptional and posttranscriptional events are detected in RNA-Seq data from second-generation sequencing, full-length mRNA isoforms are not captured. On the other hand, third-generation sequencing, which yields much longer reads, has current limitations of lower raw accuracy and throughput. Here, we combine second-generation sequencing and third-generation sequencing with a custom-designed method for isoform identification and quantification to generate a high-confidence isoform dataset for human embryonic stem cells (hESCs). We report 8,084 RefSeq-annotated isoforms detected as full-length and an additional 5,459 isoforms predicted through statistical inference. Over one-third of these are novel isoforms, including 273 RNAs from gene loci that have not previously been identified. Further characterization of the novel loci indicates that a subset is expressed in pluripotent cells but not in diverse fetal and adult tissues; moreover, their reduced expression perturbs the network of pluripotency-associated genes. Results suggest that gene identification, even in well-characterized human cell lines and tissues, is likely far from complete.},
- author = {Au, Kin Fai and Sebastiano, Vittorio and Afshar, Pegah Tootoonchi and Durruthy, Jens Durruthy and Lee, Lawrence and Williams, Brian a and van Bakel, Harm and Schadt, Eric E and Reijo-Pera, Renee a and Underwood, Jason G and Wong, Wing Hung},
- doi = {10.1073/pnas.1320101110},
- file = {:Users/ryan/Documents/Mendeley Desktop/Au et al. - 2013 - Characterization of the human ESC transcriptome by hybrid sequencing.pdf:pdf},
- issn = {1091-6490},
- journal = {Proceedings of the National Academy of Sciences of the United States of America},
- month = {nov},
- pmid = {24282307},
- title = {{Characterization of the human ESC transcriptome by hybrid sequencing.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/24282307},
- year = {2013}
- }
- @article{Wan2012,
- author = {Wan, L and Sun, F},
- doi = {10.1109/TCBB.2012.83.CEDER},
- file = {:Users/ryan/Documents/Mendeley Desktop/Wan, Sun - 2012 - CEDER Accurate detection of differentially expressed genes by combining significance of exons using RNA-Seq.pdf:pdf},
- journal = {{\ldots} and Bioinformatics, IEEE/ACM Transactions on},
- pages = {1--25},
- title = {{CEDER: Accurate detection of differentially expressed genes by combining significance of exons using RNA-Seq}},
- url = {http://ieeexplore.ieee.org/xpls/abs{\_}all.jsp?arnumber=6205734},
- year = {2012}
- }
- @article{Jones2016,
- author = {Jones, Daniel C and Kuppusamy, Kavitha T and Palpant, Nathan J and Peng, Xinxia and Charles, E and Ruohola-baker, Hannele and Ruzzo, Walter L},
- file = {:Users/ryan/Documents/Mendeley Desktop/Jones et al. - 2016 - Isolator accurate and stable analysis of isoform-level expression in RNA-Seq experiments.pdf:pdf},
- title = {{Isolator : accurate and stable analysis of isoform-level expression in RNA-Seq experiments}},
- url = {http://biorxiv.org/content/biorxiv/early/2016/11/20/088765.full.pdf},
- year = {2016}
- }
- @article{Storey2003,
- abstract = {Multiple hypothesis testing is concerned with controlling the rate of false positives when testing several hypotheses simultaneously. One multiple hypothesis testing error measure is the false discovery rate (FDR), which is loosely defined to be the expected proportion of false positives among all significant hypotheses. The FDR is especially appropriate for exploratory analyses in which one is interested in finding several significant results among many tests. In this work, we introduce a modified version of the FDR called the " positive false discovery rate " (pFDR). We discuss the advantages and disadvantages of the pFDR and investigate its statistical properties. When assuming the test statistics follow a mixture distribution, we show that the pFDR can be written as a Bayesian posterior probability and can be connected to classification theory. These properties remain asymptotically true under fairly general conditions, even under certain forms of dependence. Also, a new quantity called the " q-value " is introduced and investigated, which is a natural " Bayesian posterior p-value, " or rather the pFDR analogue of the p-value.},
- archivePrefix = {arXiv},
- arxivId = {https://projecteuclid.org/download/pdf{\_}1/euclid.aos/1074290335},
- author = {Storey, John D.},
- doi = {10.1214/aos/1074290335},
- eprint = {/projecteuclid.org/download/pdf{\_}1/euclid.aos/1074290335},
- file = {:Users/ryan/Documents/Mendeley Desktop/Storey - 2003 - The positive false discovery rate A Bayesian interpretation and the q-value.pdf:pdf},
- isbn = {0090-5364},
- issn = {00905364},
- journal = {Annals of Statistics},
- keywords = {Multiple comparisons,Simultaneous inference,p-values,pFDR,pFNR,q-values},
- number = {6},
- pages = {2013--2035},
- pmid = {3448445},
- primaryClass = {https:},
- title = {{The positive false discovery rate: A Bayesian interpretation and the q-value}},
- volume = {31},
- year = {2003}
- }
- @article{McCarthy2012,
- abstract = {A flexible statistical framework is developed for the analysis of read counts from RNA-Seq gene expression studies. It provides the ability to analyse complex experiments involving multiple treatment conditions and blocking variables while still taking full account of biological variation. Biological variation between RNA samples is estimated separately from the technical variation associated with sequencing technologies. Novel empirical Bayes methods allow each gene to have its own specific variability, even when there are relatively few biological replicates from which to estimate such variability. The pipeline is implemented in the edgeR package of the Bioconductor project. A case study analysis of carcinoma data demonstrates the ability of generalized linear model methods (GLMs) to detect differential expression in a paired design, and even to detect tumour-specific expression changes. The case study demonstrates the need to allow for gene-specific variability, rather than assuming a common dispersion across genes or a fixed relationship between abundance and variability. Genewise dispersions de-prioritize genes with inconsistent results and allow the main analysis to focus on changes that are consistent between biological replicates. Parallel computational approaches are developed to make non-linear model fitting faster and more reliable, making the application of GLMs to genomic data more convenient and practical. Simulations demonstrate the ability of adjusted profile likelihood estimators to return accurate estimators of biological variability in complex situations. When variation is gene-specific, empirical Bayes estimators provide an advantageous compromise between the extremes of assuming common dispersion or separate genewise dispersion. The methods developed here can also be applied to count data arising from DNA-Seq applications, including ChIP-Seq for epigenetic marks and DNA methylation analyses.},
- author = {McCarthy, Davis J and Chen, Yunshun and Smyth, Gordon K},
- doi = {10.1093/nar/gks042},
- file = {:Users/ryan/Documents/Mendeley Desktop/McCarthy, Chen, Smyth - 2012 - Differential expression analysis of multifactor RNA-Seq experiments with respect to biological variation.pdf:pdf},
- issn = {1362-4962},
- journal = {Nucleic acids research},
- month = {may},
- number = {10},
- pages = {4288--97},
- pmid = {22287627},
- title = {{Differential expression analysis of multifactor RNA-Seq experiments with respect to biological variation.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3378882{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {40},
- year = {2012}
- }
- @article{Silberzahn2018,
- abstract = {Twenty-nine teams involving 61 analysts used the same dataset to address the same research question: whether soccer referees are more likely to give red cards to dark skin toned players than light skin toned players. Analytic approaches varied widely across teams, and estimated effect sizes ranged from 0.89 to 2.93 in odds ratio units, with a median of 1.31. Twenty teams (69{\%}) found a statistically significant positive effect and nine teams (31{\%}) observed a non- significant relationship. Crowdsourcing data analysis, a strategy by which numerous research teams are recruited to simultaneously investigate the same research question, makes transparent how variations in analytical choices affect results.},
- author = {Silberzahn, R. and Uhlmann, E. L. and Martin, D. P. and Anselmi, P. and Aust, F. and Awtrey, E. and Bahn{\'{i}}k, {\v{S}}. and Bai, F. and Bannard, C. and Bonnier, E. and Carlsson, R. and Cheung, F. and Christensen, G. and Clay, R. and Craig, M. A. and {Dalla Rosa}, A. and Dam, L. and Evans, M. H. and {Flores Cervantes}, I. and Fong, N. and Gamez-Djokic, M. and Glenz, A. and Gordon-McKeon, S. and Heaton, T. J. and Hederos, K. and Heene, M. and {Hofelich Mohr}, A. J. and H{\"{o}}gden, F. and Hui, K. and Johannesson, M. and Kalodimos, J. and Kaszubowski, E. and Kennedy, D. M. and Lei, R. and Lindsay, T. A. and Liverani, S. and Madan, C. R. and Molden, D. and Molleman, E. and Morey, R. D. and Mulder, L. B. and Nijstad, B. R. and Pope, N. G. and Pope, B. and Prenoveau, J. M. and Rink, F. and Robusto, E. and Roderique, H. and Sandberg, A. and Schl{\"{u}}ter, E. and Sch{\"{o}}nbrodt, F. D. and Sherman, M. F. and Sommer, S. A. and Sotak, K. and Spain, S. and Sp{\"{o}}rlein, C. and Stafford, T. and Stefanutti, L. and Tauber, S. and Ullrich, J. and Vianello, M. and Wagenmakers, E.-J. and Witkowiak, M. and Yoon, S. and Nosek, B. A.},
- doi = {10.1177/2515245917747646},
- file = {:Users/ryan/Documents/Mendeley Desktop/Silberzahn et al. - 2018 - Many Analysts, One Data Set Making Transparent How Variations in Analytic Choices Affect Results.pdf:pdf},
- issn = {2515-2459},
- journal = {Advances in Methods and Practices in Psychological Science},
- month = {sep},
- number = {3},
- pages = {337--356},
- title = {{Many Analysts, One Data Set: Making Transparent How Variations in Analytic Choices Affect Results}},
- url = {http://journals.sagepub.com/doi/10.1177/2515245917747646},
- volume = {1},
- year = {2018}
- }
- @article{Zaykin2002,
- abstract = {We present a new procedure for combining P-values from a set of L hypothesis tests. Our procedure is to take the product of only those P-values less than some specified cut-off value and to evaluate the probability of such a product, or a smaller value, under the overall hypothesis that all L hypotheses are true. We give an explicit formulation for this P-value, and find by simulation that it can provide high power for detecting departures from the overall hypothesis. We extend the procedure to situations when tests are not independent. We present both real and simulated examples where the method is especially useful. These include exploratory analyses when L is large, such as genome-wide scans for marker-trait associations and meta-analytic applications that combine information from published studies, with potential for dealing with the "publication bias" phenomenon. Once the overall hypothesis is rejected, an adjustment procedure with strong family-wise error protection is available for smaller subsets of hypotheses, down to the individual tests.},
- author = {Zaykin, D. V. and Zhivotovsky, Lev a. and Westfall, P. H. and Weir, B. S.},
- doi = {10.1002/gepi.0042},
- file = {:Users/ryan/Documents/Mendeley Desktop/Zaykin et al. - 2002 - Truncated product method for combining P-values.pdf:pdf},
- isbn = {1098-2272},
- issn = {07410395},
- journal = {Genetic Epidemiology},
- keywords = {Bonferroni,Genome-wide scans,Meta-analysis,Microarrays,Multiple tests},
- number = {2},
- pages = {170--185},
- pmid = {11788962},
- title = {{Truncated product method for combining P-values}},
- url = {http://statgen.ncsu.edu/zaykin/some/TruncProdMethod.pdf},
- volume = {22},
- year = {2002}
- }
- @article{Sarwal2003,
- abstract = {BACKGROUND: The causes and clinical course of acute rejection vary, and it is not possible to predict graft outcome reliably on the basis of available clinical, pathological, and genetic markers. We hypothesized that previously unrecognized molecular heterogeneity might underlie some of the variability in the clinical course of acute renal allograft rejection and in its response to treatment.
- METHODS: We used DNA microarrays in a systematic study of gene-expression patterns in biopsy samples from normal and dysfunctional renal allografts. A combination of exploratory and supervised bioinformatic methods was used to analyze these profiles.
- RESULTS: We found consistent differences among the gene-expression patterns associated with acute rejection, nephrotoxic effects of drugs, chronic allograft nephropathy, and normal kidneys. The gene-expression patterns associated with acute rejection suggested at least three possible distinct subtypes of acute rejection that, although indistinguishable by light microscopy, were marked by differences in immune activation and cellular proliferation. Since the gene-expression patterns pointed to substantial variation in the composition of immune infiltrates, we used immunohistochemical staining to define these subtypes further. This analysis revealed a striking association between dense CD20+ B-cell infiltrates and both clinical glucocorticoid resistance (P=0.01) and graft loss (P{\textless}0.001).
- CONCLUSIONS: Systematic analysis of gene-expression patterns provides a window on the biology and pathogenesis of renal allograft rejection. Biopsy samples from patients with acute rejection that are indistinguishable on conventional histologic analysis reveal extensive differences in gene expression, which are associated with differences in immunologic and cellular features and clinical course. The presence of dense clusters of B cells in a biopsy sample was strongly associated with severe graft rejection, suggesting a pivotal role of infiltrating B cells in acute rejection.},
- author = {Sarwal, Minnie and Chua, Mei-Sze and Kambham, Neeraja and Hsieh, Szu-Chuan and Satterwhite, Thomas and Masek, Marilyn and Salvatierra, Oscar},
- doi = {10.1056/NEJMoa035588},
- file = {:Users/ryan/Documents/Mendeley Desktop/Sarwal et al. - 2003 - Molecular heterogeneity in acute renal allograft rejection identified by DNA microarray profiling.pdf:pdf},
- issn = {1533-4406},
- journal = {The New England journal of medicine},
- keywords = {Acute Disease,Adolescent,Adult,Antigens, CD20,Antigens, CD20: analysis,B-Lymphocytes,B-Lymphocytes: immunology,Biopsy,Child,Child, Preschool,Computational Biology,Drug Resistance,Drug Resistance: immunology,Gene Expression,Gene Expression Profiling,Genetic Heterogeneity,Glucocorticoids,Glucocorticoids: therapeutic use,Graft Rejection,Graft Rejection: classification,Graft Rejection: drug therapy,Graft Rejection: genetics,Graft Rejection: immunology,Humans,Immunohistochemistry,Infant,Kidney,Kidney Transplantation,Kidney Transplantation: immunology,Kidney Transplantation: pathology,Kidney: immunology,Kidney: pathology,Oligonucleotide Array Sequence Analysis,T-Lymphocytes,Transplantation, Homologous},
- month = {jul},
- number = {2},
- pages = {125--38},
- pmid = {12853585},
- title = {{Molecular heterogeneity in acute renal allograft rejection identified by DNA microarray profiling.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/12853585},
- volume = {349},
- year = {2003}
- }
- @article{Crane2012,
- author = {Crane, Brian R},
- doi = {10.1126/science.1224611},
- file = {:Users/ryan/Documents/Mendeley Desktop/Crane - 2012 - Biochemistry. Nature's intricate clockwork.pdf:pdf},
- issn = {1095-9203},
- journal = {Science (New York, N.Y.)},
- keywords = {ARNTL Transcription Factors,ARNTL Transcription Factors: chemistry,Animals,CLOCK Proteins,CLOCK Proteins: chemistry,Circadian Rhythm,Humans,Transcriptional Activation},
- month = {jul},
- number = {6091},
- pages = {165--6},
- pmid = {22798591},
- title = {{Biochemistry. Nature's intricate clockwork.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/22798591},
- volume = {337},
- year = {2012}
- }
- @article{Hamfjord2012,
- abstract = {We present the results of a global study of dysregulated miRNAs in paired samples of normal mucosa and tumor from eight patients with colorectal cancer. Although there is existing data of miRNA contribution to colorectal tumorigenesis, these studies are typically small to medium scale studies of cell lines or non-paired tumor samples. The present study is to our knowledge unique in two respects. Firstly, the normal and adjacent tumor tissue samples are paired, thus taking into account the baseline differences between individuals when testing for differential expression. Secondly, we use high-throughput sequencing, thus enabling a comprehensive survey of all miRNAs expressed in the tissues. We use Illumina sequencing technology to perform sequencing and two different tools to statistically test for differences in read counts per gene between samples: edgeR when using the pair information and DESeq when ignoring this information, i.e., treating tumor and normal samples as independent groups. We identify 37 miRNAs that are significantly dysregulated in both statistical approaches, 19 down-regulated and 18 up-regulated. Some of these miRNAs are previously published as potential regulators in colorectal adenocarcinomas such as miR-1, miR-96 and miR-145. Our comprehensive survey of differentially expressed miRNAs thus confirms some existing findings. We have also discovered 16 dysregulated miRNAs, which to our knowledge have not previously been associated with colorectal carcinogenesis: the following significantly down-regulated miR-490-3p, -628-3p/-5p, -1297, -3151, -3163, -3622a-5p, -3656 and the up-regulated miR-105, -549, -1269, -1827, -3144-3p, -3177, -3180-3p, -4326. Although the study is preliminary with only eight patients included, we believe the results add to the present knowledge on miRNA dysregulation in colorectal carcinogenesis. As such the results would serve as a robust training set for validation of potential biomarkers in a larger cohort study. Finally, we also present data supporting the hypothesis that there are differences in miRNA expression between adenocarcinomas and neuroendocrine tumors of the colon.},
- author = {Hamfjord, Julian and Stangeland, Astrid M and Hughes, Timothy and Skrede, Martina L and Tveit, Kjell M and Ikdahl, Tone and Kure, Elin H},
- doi = {10.1371/journal.pone.0034150},
- file = {:Users/ryan/Documents/Mendeley Desktop/Hamfjord et al. - 2012 - Differential expression of miRNAs in colorectal cancer comparison of paired tumor tissue and adjacent normal mu.pdf:pdf},
- issn = {1932-6203},
- journal = {PloS one},
- month = {jan},
- number = {4},
- pages = {e34150},
- pmid = {22529906},
- title = {{Differential expression of miRNAs in colorectal cancer: comparison of paired tumor tissue and adjacent normal mucosa using high-throughput sequencing.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3328481{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {7},
- year = {2012}
- }
- @article{Etz2016,
- abstract = {We revisit the results of the recent Reproducibility Project: Psychology by the Open Science Collaboration. We compute Bayes factors—a quantity that can be used to express comparative evidence for an hypothesis but also for the null hypothesis—for a large subset (N = 72) of the original papers and their corresponding replication attempts. In our computation, we take into account the likely scenario that publication bias had distorted the originally published results. Overall, 75{\%} of studies gave qualitatively similar results in terms of the amount of evidence provided. However, the evidence was often weak (i.e., Bayes factor {\textless} 10). The majority of the studies (64{\%}) did not provide strong evidence for either the null or the alternative hypothesis in either the original or the replication, and no replication attempts provided strong evidence in favor of the null. In all cases where the original paper provided strong evidence but the replication did not (15{\%}), the sample size in the replication was smaller than the original. Where the replication provided strong evidence but the original did not (10{\%}), the replication sample size was larger. We conclude that the apparent failure of the Reproducibility Project to replicate many target effects can be adequately explained by overestimation of effect sizes (or overestimation of evidence against the null hypothesis) due to small sample sizes and publication bias in the psychological literature. We further conclude that traditional sample sizes are insufficient and that a more widespread adoption of Bayesian methods is desirable.},
- author = {Etz, Alexander and Vandekerckhove, Joachim},
- doi = {10.1371/journal.pone.0149794},
- file = {:Users/ryan/Documents/Mendeley Desktop/Etz, Vandekerckhove - 2016 - A Bayesian perspective on the reproducibility project Psychology.pdf:pdf},
- isbn = {1932-6203},
- issn = {19326203},
- journal = {PLoS ONE},
- number = {2},
- pages = {1--12},
- pmid = {26919473},
- title = {{A Bayesian perspective on the reproducibility project: Psychology}},
- url = {http://journals.plos.org/plosone/article/file?id=10.1371/journal.pone.0149794{\&}type=printable},
- volume = {11},
- year = {2016}
- }
- @misc{Girke2012,
- abstract = {Advanced R/Bioconductor Workshop on High-Throughput Genetic Analysis},
- author = {Girke, Thomas (Fred Hutchinson Cancer Research Center)},
- file = {:Users/ryan/Documents/Mendeley Desktop/Girke - 2012 - ChIP-Seq Analysis with R and Bioconductor.pdf:pdf},
- keywords = {presentation,tutorial},
- mendeley-tags = {presentation,tutorial},
- title = {{ChIP-Seq Analysis with R and Bioconductor}},
- url = {http://faculty.ucr.edu/{~}tgirke/HTML{\_}Presentations/Manuals/Rngsapps/chipseqBioc2012/Rchipseq.pdf},
- year = {2012}
- }
- @article{Wang2014a,
- author = {Wang, Pin and Xue, Yiquan and Han, Yanmei and Lin, Li and Wu, Cong and Xu, Sheng and Jiang, Zhengping and Xu, Junfang and Liu, Qiuyan and Cao, Xuetao},
- file = {:Users/ryan/Documents/Mendeley Desktop/Wang et al. - 2014 - The STAT3-Binding Long Noncoding RNA lnc-DC Controls Human Dendritic Cell Differentiation.pdf:pdf},
- number = {April},
- pages = {310--313},
- title = {{The STAT3-Binding Long Noncoding RNA lnc-DC Controls Human Dendritic Cell Differentiation}},
- volume = {344},
- year = {2014}
- }
- @article{Dougherty2010,
- abstract = {Classification in bioinformatics often suffers from small samples in conjunction with large numbers of features, which makes error estimation problematic. When a sample is small, there is insufficient data to split the sample and the same data are used for both classifier design and error estimation. Error estimation can suffer from high variance, bias, or both. The problem of choosing a suitable error estimator is exacerbated by the fact that estimation performance depends on the rule used to design the classifier, the feature-label distribution to which the classifier is to be applied, and the sam- ple size. This paper reviews the performance of training-sample error estimators with respect to several criteria: estimation accuracy, variance, bias, correlation with the true error, regression on the true error, and accuracy in ranking feature sets. A number of error estimators are considered: resubstitution, leave-one-out cross-validation, 10-fold cross-validation, bol- stered resubstitution, semi-bolstered resubstitution, .632 bootstrap, .632+ bootstrap, and optimal bootstrap. It illustrates these performance criteria for certain models and for two real data sets, referring to the literature for more extensive appli- cations of these criteria. The results given in the present paper are consistent with those in the literature and lead to two conclusions: (1) much greater effort needs to be focused on error estimation, and (2) owing to the generally poor perform- ance of error estimators on small samples, for a conclusion based on a small-sample error estimator to be considered valid, it should be supported by evidence that the estimator in question can be expected to perform sufficiently well under the circumstances to justify the conclusion.},
- author = {Dougherty, Edward R. and Sima, Chao and Hanczar, Blaise and Braga-Neto, Ulisses M.},
- doi = {10.2174/157489310790596385},
- file = {:Users/ryan/Documents/Mendeley Desktop/Dougherty et al. - 2010 - Performance of Error Estimators for Classification.pdf:pdf},
- isbn = {9798628896},
- issn = {15748936},
- journal = {Current Bioinformatics},
- keywords = {classification,epistemology,error estimation,validity},
- number = {1},
- pages = {53--67},
- title = {{Performance of Error Estimators for Classification}},
- url = {https://pdfs.semanticscholar.org/2069/96c188d29db75a3f81a20aa67b76effe631f.pdf},
- volume = {5},
- year = {2010}
- }
- @article{Majewski2010,
- author = {Majewski, Ian J and Ritchie, Matthew E and Phipson, Belinda and Corbin, Jason and Pakusch, Miha and Ebert, Anja and Busslinger, Meinrad and Koseki, Haruhiko and Hu, Yifang and Smyth, Gordon K and Alexander, Warren S and Douglas, J and Blewitt, Marnie E and Hilton, Douglas J},
- doi = {10.1182/blood-2009-12-260760},
- file = {:Users/ryan/Documents/Mendeley Desktop/Majewski et al. - 2010 - stem and progenitor cells Opposing roles of polycomb repressive complexes in hematopoietic stem and progenitor.pdf:pdf},
- isbn = {2009122607},
- journal = {Blood},
- number = {5},
- pages = {731--739},
- title = {{stem and progenitor cells Opposing roles of polycomb repressive complexes in hematopoietic stem and progenitor cells}},
- url = {http://www.bloodjournal.org/content/bloodjournal/116/5/731.full.pdf},
- volume = {116},
- year = {2010}
- }
- @techreport{Bischl2012,
- abstract = {Empirical analysis of statistical algorithms often demands time-consuming ex- periments which are best performed on high performance computing clusters. We present two R packages which greatly simplify working in batch computing envi- ronments. The package BatchJobs implements the basic objects and procedures to control a batch cluster within R . It is structured around cluster versions of the well-known higher order functions Map , Reduce and Filter from functional programming. An important feature is that the state of computation is persistently available in a database. The user can query the status of jobs and then continue working with a desired subset. The second package, BatchExperiments , is tailored for the still very general sce- nario of analyzing arbitrary algorithms on problem instances. It extends BatchJobs by letting the user define an array of jobs of the kind “apply algorithm A to prob- lem instance P and store results”. It is possible to associate statistical designs with parameters of algorithms and problems and therefore to systematically study their influence on the results. In general our main contributions are: (a) Portability : Both packages use a clear and well-defined interface to the batch system which makes them applicable in most high-performance computing environments. (b) Reproducibility : Every computational part has an associated seed that the user can control to ensure reproducibility even when the underlying batch system changes. (c) Efficiency : Efficiently use batch computing clusters completely within R . (d) Abstraction and good software design : The code layers for algorithms, experiment definitions and execution are cleanly separated and enable the writing of readable and maintainable code.},
- author = {Bischl, Bernd and Lang, Michel},
- file = {:Users/ryan/Documents/Mendeley Desktop/Bischl, Lang - 2012 - Computing on high performance clusters with R Packages BatchJobs and BatchExperiments.pdf:pdf},
- institution = {technische universit{\"{a}}t dortmund},
- title = {{Computing on high performance clusters with R: Packages BatchJobs and BatchExperiments}},
- year = {2012}
- }
- @article{Cole2016,
- abstract = {RNA-seq is now the technology of choice for genome-wide differential gene expression experiments, but it is not clear how many biological replicates are needed to ensure valid biological interpretation of the results or which statistical tools are best for analyzing the data. An RNA-seq experiment with 48 biological replicates in each of two conditions was performed to answer these questions and provide guidelines for experimental design. With three biological replicates, eight of the 11 tools evaluated found only 20{\%}-40{\%} of the significantly differentially expressed (SDE) genes identified with the full set of 42 clean replicates. This rises to {\textgreater}85{\%} for the subset of SDE genes changing in expression by more than fourfold. To achieve {\textgreater}85{\%} for all SDE genes regardless of fold change requires more than 20 biological replicates. The same eight tools successfully control their false discovery rate at ≲5{\%} for all numbers of replicates, while the remaining three tools fail to control their FDR adequately, particularly for low numbers of replicates. For future RNA-seq experiments, these results suggest that more than six biological replicates should be used, rising to more than 12 when it is important to identify SDE genes for all fold changes. If less than 12 replicates are used, a superior combination of true positive and false positive performances makesedgeRthe leading tool. For higher replicate numbers, minimizing false positives is more important andDESeqmarginally outperforms the other tools.},
- author = {Cole, Christian and Schurch, Nicholas J and Schofield, Piet{\'{a}} and Gierlin, Marek and Sherstnev, Alexander and Singh, Vijender and Wrobel, Nicola and Gharbi, Karim and Simpson, Gordon G and Owen-Hughes, Tom O M and Blaxter, Mark and Barton, Geoffrey J and Gierli{\'{n}}ski, Marek and Cole, Christian and Sherstnev, Alexander and Singh, Vijender and Wrobel, Nicola and Gharbi, Karim and Simpson, Gordon G and Owen-Hughes, Tom O M and Blaxter, Mark and Barton, Geoffrey J},
- doi = {10.1261/rna.053959.115.},
- file = {:Users/ryan/Documents/Mendeley Desktop/Cole et al. - 2016 - How many biological replicates are needed in an RNA-seq experiment and which differential expression tool should yo.pdf:pdf},
- issn = {1469-9001},
- journal = {RNA (New York, N.Y.)},
- keywords = {RNA-seq,benchmarking,differential expression,experimental design,replication,rna-seq,statistical power,yeast},
- pages = {1--13},
- pmid = {27022035},
- title = {{How many biological replicates are needed in an RNA-seq experiment and which differential expression tool should you use ?}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/27022035},
- year = {2016}
- }
- @article{Turck2011,
- abstract = {Receiver operating characteristic (ROC) curves are useful tools to evaluate classifiers in biomedical and bioinformatics applications. However, conclusions are often reached through inconsistent use or insufficient statistical analysis. To support researchers in their ROC curves analysis we developed pROC, a package for R and S+ that contains a set of tools displaying, analyzing, smoothing and comparing ROC curves in a user-friendly, object-oriented and flexible interface. With data previously imported into the R or S+ environment, the pROC package builds ROC curves and includes functions for computing confidence intervals, statistical tests for comparing total or partial area under the curve or the operating points of different classifiers, and methods for smoothing ROC curves. Intermediary and final results are visualised in user-friendly interfaces. A case study based on published clinical and biomarker data shows how to perform a typical ROC analysis with pROC. pROC is a package for R and S+ specifically dedicated to ROC analysis. It proposes multiple statistical tests to compare ROC curves, and in particular partial areas under the curve, allowing proper ROC interpretation. pROC is available in two versions: in the R programming language or with a graphical user interface in the S+ statistical software. It is accessible at http://expasy.org/tools/pROC/ under the GNU General Public License. It is also distributed through the CRAN and CSAN public repositories, facilitating its installation.},
- author = {Turck, Natacha and Vutskits, Laszlo and Sanchez-Pena, Paola and Robin, Xavier and Hainard, Alexandre and Gex-Fabry, Marianne and Fouda, Catherine and Bassem, Hadiji and Mueller, Markus and Lisacek, Fr{\'{e}}d{\'{e}}rique and Puybasset, Louis and Sanchez, Jean-Charles},
- doi = {10.1007/s00134-009-1641-y},
- file = {:Users/ryan/Documents/Mendeley Desktop/Turck et al. - 2011 - pROC an open-source package for R and S to analyze and compare ROC curves.pdf:pdf},
- issn = {0342-4642},
- journal = {BMC Bioinformatics},
- keywords = {Algorithms,Bioinformatics,Combinatorial Libraries,Computational Biology/Bioinformatics,Computer Appl. in Life Sciences,Microarrays},
- pages = {12--77},
- title = {{pROC: an open-source package for R and S+ to analyze and compare ROC curves}},
- url = {http://link.springer.com/10.1007/s00134-009-1641-y},
- volume = {8},
- year = {2011}
- }
- @article{Gentleman2004,
- abstract = {The Bioconductor project is an initiative for the collaborative creation of extensible software for computational biology and bioinformatics. The goals of the project include: fostering collaborative development and widespread use of innovative software, reducing barriers to entry into interdisciplinary scientific research, and promoting the achievement of remote reproducibility of research results. We describe details of our aims and methods, identify current challenges, compare Bioconductor to other open bioinformatics projects, and provide working examples.},
- author = {Gentleman, Robert C and and Carey, Vincent J. and and Bates, Douglas M. and and Bolstad, Ben and and Dettling, Marcel and and Dudoit, Sandrine and and Ellis, Byron and and Gautier, Laurent and and Ge, Yongchao and and Gentry, Jeff and and Hornik, Kurt and and Hothorn, Torsten and and Huber, Wolfgang and and Iacus, Stefano and and Irizarry, Rafael and and Leisch, Friedrich and and Li, Cheng and and Maechler, Martin and and Rossini, Anthony J. and and Sawitzki, Gunther and and Smith, Colin and and Smyth, Gordon and and Tierney, Luke and and Yang, Jean YH and and Zhang, Jianhua},
- doi = {10.1186/gb-2004-5-10-r80},
- file = {:Users/ryan/Documents/Mendeley Desktop/Gentleman et al. - 2004 - Bioconductor open software development for computational biology and bioinformatics.pdf:pdf},
- issn = {14656906},
- journal = {Genome biology},
- keywords = {Animal Genetics and Genomics,Bioinformatics,Evolutionary Biology,Human Genetics,Microbial Genetics and Genomics,Plant Genetics {\&} Genomics},
- number = {10},
- pages = {R80},
- title = {{Bioconductor: open software development for computational biology and bioinformatics}},
- url = {http://genomebiology.biomedcentral.com/articles/10.1186/gb-2004-5-10-r80{\%}0Ahttp://dx.doi.org/10.1186/gb-2004-5-10-r80},
- volume = {5},
- year = {2004}
- }
- @article{Wesolowski2013,
- abstract = {Transcriptome-based biosensors are expected to have a large impact on the future of biotechnology. However, a central aspect of transcriptomics is differential expression analysis, where, currently, deep RNA sequencing (RNA-seq) has the potential to replace the microarray as the standard assay for RNA quantification. Our contributions here to RNA-seq differential expression analysis are two-fold. First, given the high cost of an RNA-seq run, biological replicates are rare, and therefore, information sharing across genes to obtain variance estimates is crucial. To handle such information sharing in a rigorous manner, we propose an hierarchical, empirical Bayes approach (R-EBSeq) that combines the Cufflinks model for generating relative transcript abundance measurements, known as FPKM (fragments per kilobase of transcript length per million mapped reads) with the EBArrays framework, which was previously developed for empirical Bayes analysis of microarray data. A desirable feature of R-EBSeq is easy-to-implement analysis of more than pairwise comparisons, as we illustrate with experimental data. Secondly, we develop the standard RNA-seq test data set, on the level of reads, where 79 transcripts are artificially differentially expressed and, therefore, explicitly known. This test data set allows us to compare the performance, in terms of the true discovery rate, of R-EBSeq to three other widely used RNAseq data analysis packages: Cuffdiff, DEseq and BaySeq. Our analysis indicates that DESeq identifies the first half of the differentially expressed transcripts well, but then is outperformed by Cuffdiff and R-EBSeq. Cuffdiff and R-EBSeq are the two top performers. Thus, R-EBSeq offers good performance, while allowing flexible and rigorous comparison of multiple biological conditions.},
- author = {Wesolowski, Sergiusz and Birtwistle, Marc and Rempala, Grzegorz},
- doi = {10.3390/bios3030238},
- file = {:Users/ryan/Documents/Mendeley Desktop/Wesolowski, Birtwistle, Rempala - 2013 - A Comparison of Methods for RNA-Seq Differential Expression Analysis and a New Empirical Bayes.pdf:pdf},
- issn = {2079-6374},
- journal = {Biosensors},
- keywords = {"next-generation sequencing,empirical Bayes,gene expression data"},
- month = {jun},
- number = {3},
- pages = {238--258},
- title = {{A Comparison of Methods for RNA-Seq Differential Expression Analysis and a New Empirical Bayes Approach}},
- url = {http://www.mdpi.com/2079-6374/3/3/238/},
- volume = {3},
- year = {2013}
- }
- @article{Fan2005,
- abstract = {BACKGROUND: Various analytical methods exist that first quantify gene expression and then analyze differentially expressed genes from Affymetrix GeneChip gene expression analysis array data. These methods differ in the choice of probe measure (quantification of probe hybridization), summarizing multiple probe intensities into a gene expression value, and analysis of differential gene expression. Research papers that describe these methods focus on performance, and how their approaches differ from others. To better understand the common features and differences between various methods, and to evaluate their impact on the results of gene expression analysis, we describe a class of models, referred to as generalized probe models (GPMs), which encompass various currently available methods.
- RESULTS: Using an empirical dataset, we compared different formulations of GPMs, and GPMs with three other commonly used methods, i.e. MAS 5.0, dChip, and RMA. The comparison shows that, on a genome-wide scale , different methods yield similar results if the same probe measures are chosen.
- CONCLUSION: In this paper we present a general framework, i.e. GPMs, which encompasses various methods. GPMs permit the use of a wide range of probe measures and facilitate appropriate comparison between commonly used methods. We demonstrate that the dissimilar results stem primarily from different choice of probe measures, rather than other factors.},
- author = {Fan, Wenhong and Pritchard, Joel I and Olson, James M and Khalid, Najma and Zhao, Lue Ping},
- doi = {10.1186/1471-2164-6-16},
- file = {:Users/ryan/Documents/Mendeley Desktop/Fan et al. - 2005 - A class of models for analyzing GeneChip gene expression analysis array data.pdf:pdf},
- issn = {1471-2164},
- journal = {BMC genomics},
- keywords = {Algorithms,Cell Line, Tumor,DNA Primers,DNA Primers: chemistry,Data Interpretation, Statistical,Gene Expression Profiling,Genome,Humans,Models, Statistical,Nucleic Acid Hybridization,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Software},
- month = {jan},
- pages = {16},
- pmid = {15710039},
- title = {{A class of models for analyzing GeneChip gene expression analysis array data.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=553974{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {6},
- year = {2005}
- }
- @article{Bourgon2010,
- abstract = {With high-dimensional data, variable-by-variable statistical testing is often used to select variables whose behavior differs across conditions. Such an approach requires adjustment for multiple testing, which can result in low statistical power. A two-stage approach that first filters variables by a criterion independent of the test statistic, and then only tests variables which pass the filter, can provide higher power. We show that use of some filter/test statistics pairs presented in the literature may, however, lead to loss of type I error control. We describe other pairs which avoid this problem. In an application to microarray data, we found that gene-by-gene filtering by overall variance followed by a t-test increased the number of discoveries by 50{\%}. We also show that this particular statistic pair induces a lower bound on fold-change among the set of discoveries. Independent filtering-using filter/test pairs that are independent under the null hypothesis but correlated under the alternative-is a general approach that can substantially increase the efficiency of experiments.},
- author = {Bourgon, Richard and Gentleman, Robert and Huber, Wolfgang},
- doi = {10.1073/pnas.0914005107},
- file = {:Users/ryan/Documents/Mendeley Desktop/Bourgon, Gentleman, Huber - 2010 - Independent filtering increases detection power for high-throughput experiments.pdf:pdf},
- issn = {1091-6490},
- journal = {Proceedings of the National Academy of Sciences of the United States of America},
- keywords = {Algorithms,Biometry,Biometry: methods,Computational Biology,Genetic,Models},
- month = {may},
- number = {21},
- pages = {9546--51},
- pmid = {20460310},
- title = {{Independent filtering increases detection power for high-throughput experiments.}},
- url = {http://www.pnas.org/content/107/21/9546.short http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2906865{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {107},
- year = {2010}
- }
- @misc{Anders2013,
- author = {Anders, Simon},
- title = {{HTSeq: Analysing high-throughput sequencing data with Python}},
- url = {http://www-huber.embl.de/users/anders/HTSeq/doc/index.html},
- year = {2013}
- }
- @article{Massa2010a,
- abstract = {BACKGROUND: Recently, a great effort in microarray data analysis is directed towards the study of the so-called gene sets. A gene set is defined by genes that are, somehow, functionally related. For example, genes appearing in a known biological pathway naturally define a gene set. The gene sets are usually identified from a priori biological knowledge. Nowadays, many bioinformatics resources store such kind of knowledge (see, for example, the Kyoto Encyclopedia of Genes and Genomes, among others). Although pathways maps carry important information about the structure of correlation among genes that should not be neglected, the currently available multivariate methods for gene set analysis do not fully exploit it.
- RESULTS: We propose a novel gene set analysis specifically designed for gene sets defined by pathways. Such analysis, based on graphical models, explicitly incorporates the dependence structure among genes highlighted by the topology of pathways. The analysis is designed to be used for overall surveillance of changes in a pathway in different experimental conditions. In fact, under different circumstances, not only the expression of the genes in a pathway, but also the strength of their relations may change. The methods resulting from the proposal allow both to test for variations in the strength of the links, and to properly account for heteroschedasticity in the usual tests for differential expression.
- CONCLUSIONS: The use of graphical models allows a deeper look at the components of the pathway that can be tested separately and compared marginally. In this way it is possible to test single components of the pathway and highlight only those involved in its deregulation.},
- author = {Massa, Maria Sofia and Chiogna, Monica and Romualdi, Chiara},
- doi = {10.1186/1752-0509-4-121},
- file = {:Users/ryan/Documents/Mendeley Desktop/Massa, Chiogna, Romualdi - 2010 - Gene set analysis exploiting the topology of a pathway.pdf:pdf},
- issn = {1752-0509},
- journal = {BMC systems biology},
- keywords = {Animals,Computational Biology,Computational Biology: methods,Computer Graphics,Gene Expression Profiling,Humans,Mice,Models, Genetic,Receptor, Epidermal Growth Factor,Receptor, Epidermal Growth Factor: genetics,Receptor, Epidermal Growth Factor: metabolism,Receptors, Antigen, B-Cell,Receptors, Antigen, B-Cell: genetics,Receptors, Antigen, B-Cell: metabolism,Signal Transduction},
- month = {jan},
- pages = {121},
- pmid = {20809931},
- title = {{Gene set analysis exploiting the topology of a pathway.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2945950{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {4},
- year = {2010}
- }
- @article{Wilkinson2006,
- abstract = {Studies suggest that surveillance or protocol biopsies that are performed during the first year after kidney transplantation may be clinically useful in identifying early acute rejection or chronic allograft nephropathy at a point when they may be amenable to treatment. Although the benefit of this approach has yet to be evaluated in large, multicenter, prospective trials, numerous studies suggest that implementation of protocol biopsies may improve long-term graft function. In particular, a number of reports suggest that detection of chronic allograft nephropathy in early protocol biopsies is predictive of subsequent graft function and loss and that early treatment may have a dramatic effect on the outcome of the graft. Protocol biopsies also have the potential to be of great value in high-risk patients, such as those with delayed graft function, by allowing for early intervention for acute rejection. Furthermore, the procedure seems to be relatively straightforward and safe. Nevertheless, paucity of data has meant that clear proof of a benefit of early treatment of subclinical rejection and chronic allograft nephropathy detected by protocol biopsy is lacking. Moreover, the optimal timing of protocol biopsies and reliable methods to quantify the histologic changes observed in biopsy specimens have yet to be determined. This review discusses the pros and cons of protocol biopsies and considers the place of this procedure in the routine treatment of kidney transplant patients.},
- author = {Wilkinson, Alan},
- doi = {10.2215/CJN.00350705},
- file = {:Users/ryan/Documents/Mendeley Desktop/Wilkinson - 2006 - Protocol transplant biopsies are they really needed.pdf:pdf},
- issn = {1555905X},
- journal = {Clinical journal of the American Society of Nephrology : CJASN},
- number = {1},
- pages = {130--137},
- title = {{Protocol transplant biopsies: are they really needed?}},
- url = {https://cjasn.asnjournals.org/content/clinjasn/1/1/130.full.pdf},
- volume = {1},
- year = {2006}
- }
- @article{Salomon2002,
- annote = {doi: 10.1053/ajkd.2002.36426},
- author = {Salomon, Daniel R},
- doi = {10.1053/ajkd.2002.36426},
- issn = {0272-6386},
- journal = {American Journal of Kidney Diseases},
- month = {oct},
- number = {4},
- pages = {674--677},
- publisher = {Elsevier},
- title = {{Protocol biopsies should be part of the routine management of kidney transplant recipients}},
- url = {https://doi.org/10.1053/ajkd.2002.36426},
- volume = {40},
- year = {2002}
- }
- @article{Wickersheim2013,
- abstract = {A large number of methods are available to deplete ribosomal RNA reads from high-throughput RNA sequencing experiments. Such methods are critical for sequencing Drosophila small RNAs between 20 and 30 nucleotides because size selection is not typically sufficient to exclude the highly abundant class of 30 nucleotide 2S rRNA. Here we demonstrate that pre-annealing terminator oligos complimentary to Drosophila 2S rRNA prior to 5' adapter ligation and reverse transcription efficiently depletes 2S rRNA sequences from the sequencing reaction in a simple and inexpensive way. This depletion is highly specific and is achieved with minimal perturbation of miRNA and piRNA profiles.},
- author = {Wickersheim, Michelle L and Blumenstiel, Justin P},
- doi = {10.2144/000114102},
- file = {:Users/ryan/Documents/Mendeley Desktop/Wickersheim, Blumenstiel - 2013 - Terminator oligo blocking efficiently eliminates rRNA from Drosophila small RNA sequencing libraries.pdf:pdf},
- issn = {1940-9818},
- journal = {BioTechniques},
- keywords = {2s rrna,article is,available at www,biotechniques,com,drosophila,mirna,pirna,small rna sequencing,supplementary material for this},
- month = {nov},
- number = {5},
- pages = {269--72},
- pmid = {24215643},
- title = {{Terminator oligo blocking efficiently eliminates rRNA from Drosophila small RNA sequencing libraries.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/24215643},
- volume = {55},
- year = {2013}
- }
- @article{Lau2018,
- author = {Lau, Colleen M. and Adams, Nicholas M. and Geary, Clair D. and Weizman, Orr-El and Rapp, Moritz and Pritykin, Yuri and Leslie, Christina S. and Sun, Joseph C.},
- doi = {10.1038/s41590-018-0176-1},
- file = {:Users/ryan/Documents/Mendeley Desktop/Lau et al. - 2018 - Epigenetic control of innate and adaptive immune memory.pdf:pdf},
- issn = {1529-2908},
- journal = {Nature Immunology},
- publisher = {Springer US},
- title = {{Epigenetic control of innate and adaptive immune memory}},
- url = {http://www.nature.com/articles/s41590-018-0176-1},
- year = {2018}
- }
- @article{Dillies2012,
- abstract = {During the last 3 years, a number of approaches for the normalization of RNA sequencing data have emerged in the literature, differing both in the type of bias adjustment and in the statistical strategy adopted. However, as data continue to accumulate, there has been no clear consensus on the appropriate normalization method to be used or the impact of a chosen method on the downstream analysis. In this work, we focus on a comprehensive comparison of seven recently proposed normalization methods for the differential analysis of RNA-seq data, with an emphasis on the use of varied real and simulated datasets involving different species and experimental designs to represent data characteristics commonly observed in practice. Based on this comparison study, we propose practical recommendations on the appropriate normalization method to be used and its impact on the differential analysis of RNA-seq data.},
- annote = {From Duplicate 1 (A comprehensive evaluation of normalization methods for Illumina high-throughput RNA sequencing data analysis. - Dillies, Marie-Agn{\`{e}}s; Rau, Andrea; Aubert, Julie; Hennequet-Antier, Christelle; Jeanmougin, Marine; Servant, Nicolas; Keime, C{\'{e}}line; Marot, Guillemette; Castel, David; Estelle, Jordi; Guernec, Gregory; Jagla, Bernd; Jouneau, Luc; Lalo{\"{e}}, Denis; Le Gall, Caroline; Scha{\"{e}}ffer, Brigitte; Le Crom, St{\'{e}}phane; Guedj, Micka{\"{e}}l; Jaffr{\'{e}}zic, Florence)
- From Duplicate 2 (
- A comprehensive evaluation of normalization methods for Illumina high-throughput RNA sequencing data analysis.
- - Dillies, Marie-Agn{\`{e}}s; Rau, Andrea; Aubert, Julie; Hennequet-Antier, Christelle; Jeanmougin, Marine; Servant, Nicolas; Keime, C{\'{e}}line; Marot, Guillemette; Castel, David; Estelle, Jordi; Guernec, Gregory; Jagla, Bernd; Jouneau, Luc; Lalo{\"{e}}, Denis; Le Gall, Caroline; Scha{\"{e}}ffer, Brigitte; Le Crom, St{\'{e}}phane; Guedj, Micka{\"{e}}l; Jaffr{\'{e}}zic, Florence )
- },
- author = {Dillies, Marie-Agn{\`{e}}s Agn{\`{e}}s and Rau, Andrea and Aubert, Julie and Hennequet-Antier, Christelle and Jeanmougin, Marine and Servant, Nicolas and Keime, C{\'{e}}line and Marot, Nicolas Servant and Castel, David and Estelle, Jordi and Guernec, Gregory and Jagla, Bernd and Jouneau, Luc and Lalo{\"{e}}, Denis and {Le Gall}, Caroline and Scha{\"{e}}ffer, Brigitte and {Le Crom}, St{\'{e}}phane and Guedj, Micka{\"{e}}l and Jaffr{\'{e}}zic, Florence and Marot, Guillemette and Castel, David and Estelle, Jordi and Guernec, Gregory and Jagla, Bernd and Jouneau, Luc and Lalo{\"{e}}, Denis and {Le Gall}, Caroline and Scha{\"{e}}ffer, Brigitte and {Le Crom}, St{\'{e}}phane and Guedj, Micka{\"{e}}l and Jaffr{\'{e}}zic, Florence},
- doi = {10.1093/bib/bbs046},
- file = {:Users/ryan/Documents/Mendeley Desktop/Dillies et al. - 2012 - A comprehensive evaluation of normalization methods for Illumina high-throughput RNA sequencing data analysis.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Dillies et al. - 2012 - A comprehensive evaluation of normalization methods for Illumina high-throughput RNA sequencing data analysis(2).pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Dillies et al. - 2012 - A comprehensive evaluation of normalization methods for Illumina high-throughput RNA sequencing data analysis.pdf:pdf},
- isbn = {1477-4054 (Electronic)$\backslash$r1467-5463 (Linking)},
- issn = {1477-4054},
- journal = {Briefings in bioinformatics},
- keywords = {Differential analysis,High-throughput sequencing,Normalization,RNA-seq,differential analysis,high-throughput sequencing,normalization,rna-seq},
- month = {sep},
- number = {6},
- pages = {671--683},
- pmid = {22988256},
- title = {{A comprehensive evaluation of normalization methods for Illumina high-throughput RNA sequencing data analysis.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/22988256 http://bib.oxfordjournals.org/content/14/6/671.full.pdf},
- volume = {14},
- year = {2012}
- }
- @article{Etz2015,
- author = {Etz, Alexander},
- file = {:Users/ryan/Documents/Mendeley Desktop/Etz - 2015 - Using Bayes Factors to Get the Most out of Linear Regression A Practical Guide Using R.pdf:pdf},
- journal = {The Winnower},
- keywords = {bayes factor,bayesian,linear regression,model selection},
- number = {8},
- title = {{Using Bayes Factors to Get the Most out of Linear Regression : A Practical Guide Using R}},
- url = {https://winnower-production.s3.amazonaws.com/papers/278/v4/pdf/278-using-bayes-factors-to-get-the-most-out-of-linear-regression-a-practical-guide-using-r.pdf},
- volume = {March},
- year = {2015}
- }
- @article{Blanco2007,
- abstract = {This unit describes the usage of geneid, an efficient gene-finding program that allows for the analysis of large genomic sequences, including whole mammalian chromosomes. These sequences can be partially annotated, and geneid can be used to refine this initial annotation. Training geneid is relatively easy, and parameter configurations exist for a number of eukaryotic species. Geneid produces output in a variety of standard formats. The results, thus, can be processed by a variety of software tools, including visualization programs. Geneid software is in the public domain, and it is undergoing constant development. It is easy to install and use. Exhaustive benchmark evaluations show that geneid compares favorably with other existing gene finding tools.},
- author = {Blanco, Enrique and Parra, Gen{\'{i}}s and Guig{\'{o}}, Roderic},
- doi = {10.1002/0471250953.bi0403s18},
- file = {:Users/ryan/Documents/Mendeley Desktop/Blanco, Parra, Guig{\'{o}} - 2007 - Using geneid to identify genes.pdf:pdf},
- issn = {1934-340X},
- journal = {Current protocols in bioinformatics / editoral board, Andreas D. Baxevanis ... [et al.]},
- keywords = {Algorithms,Base Sequence,Chromosome Mapping,Chromosome Mapping: methods,DNA,DNA: methods,Genes,Genes: genetics,Molecular Sequence Data,Sequence Alignment,Sequence Alignment: methods,Sequence Analysis},
- month = {jun},
- number = {1},
- pages = {Unit 4.3},
- pmid = {18428791},
- title = {{Using geneid to identify genes.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/18428791},
- volume = {Chapter 4},
- year = {2007}
- }
- @article{Hansen2011,
- abstract = {The ability to measure gene expression on a genome-wide scale is one of the most promising accomplishments in molecular biology. Microarrays, the technology that first permitted this, were riddled with problems due to unwanted sources of variability. Many of these problems are now mitigated, after a decade's worth of statistical methodology development. The recently developed RNA sequencing (RNA-seq) technology has generated much excitement in part due to claims of reduced variability in comparison to microarrays. However, we show that RNA-seq data demonstrate unwanted and obscuring variability similar to what was first observed in microarrays. In particular, we find guanine-cytosine content (GC-content) has a strong sample-specific effect on gene expression measurements that, if left uncorrected, leads to false positives in downstream results. We also report on commonly observed data distortions that demonstrate the need for data normalization. Here, we describe a statistical methodology that improves precision by 42{\%} without loss of accuracy. Our resulting conditional quantile normalization algorithm combines robust generalized regression to remove systematic bias introduced by deterministic features such as GC-content and quantile normalization to correct for global distortions.},
- author = {Hansen, Kasper D and Irizarry, Rafael A and Wu, Zhijin},
- doi = {10.1093/biostatistics/kxr054},
- file = {:Users/ryan/Documents/Mendeley Desktop/Hansen, Irizarry, Wu - 2012 - Removing technical variability in RNA-seq data using conditional quantile normalization.pdf:pdf},
- issn = {1468-4357},
- journal = {Biostatistics (Oxford, England)},
- keywords = {Algorithms,Analysis of Variance,Base Composition,Biostatistics,Databases,Gene Expression Profiling,Gene Expression Profiling: statistics {\&} numerical,High-Throughput Nucleotide Sequencing,High-Throughput Nucleotide Sequencing: statistics,Humans,Nucleic Acid,Nucleic Acid: statistics {\&} numerical data,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: statistic,RNA,RNA: statistics {\&} numerical data,Sequence Analysis},
- month = {apr},
- number = {2},
- pages = {204--16},
- pmid = {22285995},
- title = {{Removing technical variability in RNA-seq data using conditional quantile normalization.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3297825{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {13},
- year = {2012}
- }
- @article{Hachiya2009a,
- abstract = {MOTIVATION: The accurate detection of orthologous segments (also referred to as syntenic segments) plays a key role in comparative genomics, as it is useful for inferring genome rearrangement scenarios and computing whole-genome alignments. Although a number of algorithms for detecting orthologous segments have been proposed, none of them contain a framework for optimizing their parameter values. METHODS: In the present study, we propose an algorithm, named OSfinder (Orthologous Segment finder), which uses a novel scoring scheme based on stochastic models. OSfinder takes as input the positions of short homologous regions (also referred to as anchors) and explicitly discriminates orthologous anchors from non-orthologous anchors by using Markov chain models which represent respective geometric distributions of lengths of orthologous and non-orthologous anchors. Such stochastic modeling makes it possible to optimize parameter values by maximizing the likelihood of the input dataset, and to automate the setting of the optimal parameter values. RESULTS: We validated the accuracies of orthology-mapping algorithms on the basis of their consistency with the orthology annotation of genes. Our evaluation tests using mammalian and bacterial genomes demonstrated that OSfinder shows higher accuracy than previous algorithms. AVAILABILITY: The OSfinder software was implemented as a C++ program. The software is freely available at http://osfinder.dna.bio.keio.ac.jp under the GNU General Public License. SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.},
- author = {Hachiya, Tsuyoshi and Osana, Yasunori and Popendorf, Kris and Sakakibara, Yasubumi},
- doi = {10.1093/bioinformatics/btp070},
- file = {:Users/ryan/Documents/Mendeley Desktop/Hachiya et al. - 2009 - Accurate identification of orthologous segments among multiple genomes(2).pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Hachiya et al. - 2009 - Accurate identification of orthologous segments among multiple genomes.pdf:pdf},
- isbn = {1367-4811 (Electronic)$\backslash$n1367-4803 (Linking)},
- issn = {13674803},
- journal = {Bioinformatics},
- number = {7},
- pages = {853--860},
- pmid = {19188192},
- title = {{Accurate identification of orthologous segments among multiple genomes}},
- url = {http://bioinformatics.oxfordjournals.org/content/25/7/853.short},
- volume = {25},
- year = {2009}
- }
- @article{Houseman2014,
- abstract = {MOTIVATION: Recently there has been increasing interest in the effects of cell mixture on the measurement of DNA methylation, specifically the extent to which small perturbations in cell mixture proportions can register as changes in DNA methylation. A recently published set of statistical methods exploits this association to infer changes in cell mixture proportions, and these methods are presently being applied to adjust for cell mixture effect in the context of epigenome-wide association studies. However, these adjustments require the existence of reference datasets, which may be laborious or expensive to collect. For some tissues such as placenta, saliva, adipose or tumor tissue, the relevant underlying cell types may not be known.$\backslash$n$\backslash$nRESULTS: We propose a method for conducting epigenome-wide association studies analysis when a reference dataset is unavailable, including a bootstrap method for estimating standard errors. We demonstrate via simulation study and several real data analyses that our proposed method can perform as well as or better than methods that make explicit use of reference datasets. In particular, it may adjust for detailed cell type differences that may be unavailable even in existing reference datasets.$\backslash$n$\backslash$nAVAILABILITY AND IMPLEMENTATION: Software is available in the R package RefFreeEWAS. Data for three of four examples were obtained from Gene Expression Omnibus (GEO), accession numbers GSE37008, GSE42861 and GSE30601, while reference data were obtained from GEO accession number GSE39981.$\backslash$n$\backslash$nCONTACT: andres.houseman@oregonstate.edu$\backslash$n$\backslash$nSUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.},
- author = {Houseman, Eugene Andres and Molitor, John and Marsit, Carmen J.},
- doi = {10.1093/bioinformatics/btu029},
- file = {:Users/ryan/Documents/Mendeley Desktop/Houseman, Molitor, Marsit - 2014 - Reference-free cell mixture adjustments in analysis of DNA methylation data.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Houseman, Molitor, Marsit - 2014 - Reference-free cell mixture adjustments in analysis of DNA methylation data(2).pdf:pdf},
- isbn = {1367-4811 (Electronic)$\backslash$r1367-4803 (Linking)},
- issn = {1367-4803},
- journal = {Bioinformatics},
- number = {10},
- pages = {1431--1439},
- pmid = {24451622},
- title = {{Reference-free cell mixture adjustments in analysis of DNA methylation data}},
- url = {http://bioinformatics.oxfordjournals.org/cgi/doi/10.1093/bioinformatics/btu029 http://bioinformatics.oxfordjournals.org/content/suppl/2014/01/18/btu029.DC1/RefFree-Houseman-Bioinformatics-R1-Supplement.pdf},
- volume = {30},
- year = {2014}
- }
- @article{Wang2014,
- abstract = {Histone modification (HM) patterns are widely applied to identify transcription factor binding regions (TFBRs). However, how frequently the TFBRs overlap with genomic regions enriched with certain types of HMs and which HM marker is more effective to pinpoint the TFBRs have not been systematically investigated. To address these problems, we studied 149 transcription factor (TF) ChIP-seq datasets and 33 HM ChIP-seq datasets in three cell lines. We found that on average about 90{\%} of the TFBRs overlap with the H3K4me2-enriched regions. Moreover, the H3K4me2-enriched regions with stronger signals of H3K4me2 enrichment more likely overlap with the TFBRs than those with weaker signals. In addition, we showed that the H3K4me2-enriched regions together with the H3K27ac-enriched regions can greatly reduce false positive predictions of the TFBRs. Our study sheds light on the comprehensive discovery of the TFBRs using the HeK4me-enriched regions, especially when no good antibody to a TF exists.},
- author = {Wang, Ying and Li, Xiaoman and Hu, Haiyan},
- doi = {10.1016/j.ygeno.2014.02.002},
- file = {:Users/ryan/Documents/Mendeley Desktop/Wang, Li, Hu - 2014 - H3K4me2 reliably defines transcription factor binding regions in different cells.pdf:pdf},
- issn = {1089-8646},
- journal = {Genomics},
- keywords = {ChIP-seq,H3K4me2,Histone modification,Transcription factor binding regions},
- month = {feb},
- pmid = {24530516},
- publisher = {Elsevier B.V.},
- title = {{H3K4me2 reliably defines transcription factor binding regions in different cells.}},
- url = {http://dx.doi.org/10.1016/j.ygeno.2014.02.002 http://www.ncbi.nlm.nih.gov/pubmed/24530516},
- year = {2014}
- }
- @article{Komori2011,
- abstract = {Cytosine methylation of DNA CpG dinucleotides in gene promoters is an epigenetic modification that regulates gene transcription. While many methods exist to interrogate methylation states, few current methods offer large-scale, targeted, single CpG resolution. We report an approach combining bisulfite treatment followed by microdroplet PCR with next-generation sequencing to assay the methylation state of 50 genes in the regions 1 kb upstream of and downstream from their transcription start sites. This method yielded 96{\%} coverage of the targeted CpGs and demonstrated high correlation between CpG island (CGI) DNA methylation and transcriptional regulation. The method was scaled to interrogate the methylation status of 77,674 CpGs in the promoter regions of 2100 genes in primary CD4 T cells. The 2100 gene library yielded 97{\%} coverage of all targeted CpGs and 99{\%} of the target amplicons.},
- author = {Komori, H Kiyomi and LaMere, Sarah a and Torkamani, Ali and Hart, G Traver and Kotsopoulos, Steve and Warner, Jason and Samuels, Michael L and Olson, Jeff and Head, Steven R and Ordoukhanian, Phillip and Lee, Pauline L and Link, Darren R and Salomon, Daniel R},
- doi = {10.1101/gr.116863.110},
- file = {:Users/ryan/Documents/Mendeley Desktop/Komori et al. - 2011 - Application of microdroplet PCR for large-scale targeted bisulfite sequencing.pdf:pdf},
- issn = {1549-5469},
- journal = {Genome research},
- keywords = {Base Sequence,CpG Islands,DNA,DNA Methylation,DNA Primers,DNA Primers: chemistry,DNA: chemistry,DNA: genetics,Epigenesis, Genetic,High-Throughput Nucleotide Sequencing,High-Throughput Nucleotide Sequencing: methods,Humans,Jurkat Cells,Microchemistry,Microchemistry: methods,Polymerase Chain Reaction,Polymerase Chain Reaction: methods,Promoter Regions, Genetic,Sequence Analysis, DNA,Sequence Analysis, DNA: methods,Sulfites,Sulfites: chemistry},
- month = {oct},
- number = {10},
- pages = {1738--45},
- pmid = {21757609},
- title = {{Application of microdroplet PCR for large-scale targeted bisulfite sequencing.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3202290{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {21},
- year = {2011}
- }
- @article{Joyce2012,
- author = {Joyce, Gerald F},
- doi = {10.1126/science.1221724},
- file = {:Users/ryan/Documents/Mendeley Desktop/Joyce - 2012 - Evolution. Toward an alternative biology.pdf:pdf},
- issn = {1095-9203},
- journal = {Science (New York, N.Y.)},
- keywords = {Aptamers, Nucleotide,Aptamers, Nucleotide: chemistry,Aptamers, Nucleotide: metabolism,Evolution, Molecular,Molecular Mimicry,Nucleic Acids,Nucleic Acids: chemistry,Polymers,Polymers: chemistry},
- month = {apr},
- number = {6079},
- pages = {307--8},
- pmid = {22517850},
- title = {{Evolution. Toward an alternative biology.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/22517850},
- volume = {336},
- year = {2012}
- }
- @article{Figueroa2012,
- abstract = {BACKGROUND: Supervised learning methods need annotated data in order to generate efficient models. Annotated data, however, is a relatively scarce resource and can be expensive to obtain. For both passive and active learning methods, there is a need to estimate the size of the annotated sample required to reach a performance target.
- METHODS: We designed and implemented a method that fits an inverse power law model to points of a given learning curve created using a small annotated training set. Fitting is carried out using nonlinear weighted least squares optimization. The fitted model is then used to predict the classifier's performance and confidence interval for larger sample sizes. For evaluation, the nonlinear weighted curve fitting method was applied to a set of learning curves generated using clinical text and waveform classification tasks with active and passive sampling methods, and predictions were validated using standard goodness of fit measures. As control we used an un-weighted fitting method.
- RESULTS: A total of 568 models were fitted and the model predictions were compared with the observed performances. Depending on the data set and sampling method, it took between 80 to 560 annotated samples to achieve mean average and root mean squared error below 0.01. Results also show that our weighted fitting method outperformed the baseline un-weighted method (p {\textless} 0.05).
- CONCLUSIONS: This paper describes a simple and effective sample size prediction algorithm that conducts weighted fitting of learning curves. The algorithm outperformed an un-weighted algorithm described in previous literature. It can help researchers determine annotation sample size for supervised machine learning.},
- author = {Figueroa, Rosa L and Zeng-Treitler, Qing and Kandula, Sasikiran and Ngo, Long H},
- doi = {10.1186/1472-6947-12-8},
- file = {:Users/ryan/Documents/Mendeley Desktop/Figueroa et al. - 2012 - Predicting sample size required for classification performance.pdf:pdf},
- issn = {1472-6947},
- journal = {BMC medical informatics and decision making},
- keywords = {Algorithms,Data Interpretation, Statistical,Diagnosis, Computer-Assisted,Humans,Learning Curve,Models, Statistical,Nonlinear Dynamics,Pattern Recognition, Automated,Predictive Value of Tests,Probability Learning,Problem-Based Learning,Problem-Based Learning: methods,Reproducibility of Results,Sample Size,Stochastic Processes},
- month = {jan},
- number = {1},
- pages = {8},
- pmid = {22336388},
- publisher = {BioMed Central Ltd},
- title = {{Predicting sample size required for classification performance.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3307431{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {12},
- year = {2012}
- }
- @article{Wickham2011,
- abstract = {We propose a new framework for visualising tables of counts, proportions and probabilities. We call our framework product plots, alluding to the computation of area as a product of height and width, and the statistical concept of generating a joint distribution from the product of conditional and marginal distributions. The framework, with extensions, is sufficient to encompass over 20 visualisations previously described in fields of statistical graphics and infovis, including bar charts, mosaic plots, treemaps, equal area plots and fluctuation diagrams.},
- author = {Wickham, Hadley and Hofmann, Heike},
- doi = {10.1109/TVCG.2011.227},
- file = {:Users/ryan/Documents/Mendeley Desktop/Wickham, Hofmann - 2011 - Product plots.pdf:pdf},
- issn = {1941-0506},
- journal = {IEEE transactions on visualization and computer graphics},
- month = {dec},
- number = {12},
- pages = {2223--30},
- pmid = {22034341},
- title = {{Product plots.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/22034341},
- volume = {17},
- year = {2011}
- }
- @article{Fu2011,
- abstract = {We implement a unique strategy for single molecule counting termed stochastic labeling, where random attachment of a diverse set of labels converts a population of identical DNA molecules into a population of distinct DNA molecules suitable for threshold detection. The conceptual framework for stochastic labeling is developed and experimentally demonstrated by determining the absolute and relative number of selected genes after stochastically labeling approximately 360,000 different fragments of the human genome. The approach does not require the physical separation of molecules and takes advantage of highly parallel methods such as microarray and sequencing technologies to simultaneously count absolute numbers of multiple targets. Stochastic labeling should be particularly useful for determining the absolute numbers of RNA or DNA molecules in single cells.},
- author = {Fu, Glenn K and Hu, Jing and Wang, Pei-hua and Fodor, Stephen P A},
- doi = {10.1073/pnas.1017621108},
- file = {:Users/ryan/Documents/Mendeley Desktop/Fu et al. - 2011 - Counting individual DNA molecules by the stochastic attachment of diverse labels.pdf:pdf},
- issn = {1091-6490},
- journal = {Proceedings of the National Academy of Sciences of the United States of America},
- keywords = {B-Lymphocytes,B-Lymphocytes: cytology,DNA,DNA: analysis,Down Syndrome,Down Syndrome: genetics,Fluorescence,Fluorescence: methods,Genome,Human,Humans,Male,Microscopy,Models,Oligonucleotide Array Sequence Analysis,Poisson Distribution,RNA,RNA: analysis,Regression Analysis,Reproducibility of Results,Sequence Analysis,Statistical,Stochastic Processes},
- month = {may},
- number = {22},
- pages = {9026--31},
- pmid = {21562209},
- title = {{Counting individual DNA molecules by the stochastic attachment of diverse labels.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3107322{\&}tool=pmcentrez{\&}rendertype=abstract http://www.pnas.org/content/108/22/9026.long},
- volume = {108},
- year = {2011}
- }
- @article{Pounds2006,
- abstract = {Motivation: Presently available methods that use p-values to estimate or control the false discovery rate (FDR) implicitly assume that p-values are continuously distributed and based on two-sided tests. Therefore, it is difficult to reliably estimate the FDR when p-values are discrete or based on one-sided tests.Results: A simple and robust method to estimate the FDR is proposed. The proposed method does not rely on implicit assumptions that tests are two-sided or yield continuously distributed p-values. The proposed method is proven to be conservative and have desirable large-sample properties. In addition, the proposed method was among the best performers across a series of ‘real data simulations' comparing the performance of five currently available methods.Availability: Libraries of S-plus and R routines to implement the method are freely available from www.stjuderesearch.org/depts/biostatsContact:stanley.pounds@stjude.orgSupplementary information: Supplementary data are avilable at Bioinformatics online.},
- author = {Pounds, Stan and Cheng, Cheng},
- doi = {10.1093/bioinformatics/btl328},
- file = {:Users/ryan/Documents/Mendeley Desktop/Pounds, Cheng - 2006 - Robust estimation of the false discovery rate.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Pounds, Cheng - 2006 - Robust estimation of the false discovery rate.r:r;:Users/ryan/Documents/Mendeley Desktop/Pounds, Cheng - 2006 - Robust estimation of the false discovery rate(2).pdf:pdf},
- isbn = {1367-4811 (Electronic)
1367-4803 (Linking)},
- issn = {13674803},
- journal = {Bioinformatics},
- number = {16},
- pages = {1979--1987},
- pmid = {16777905},
- title = {{Robust estimation of the false discovery rate}},
- url = {http://www.stjuderesearch.org/depts/biostats/documents/robust-fdr.R http://bioinformatics.oxfordjournals.org/content/22/16/1979.full.pdf},
- volume = {22},
- year = {2006}
- }
- @article{Lun2015a,
- abstract = {{\textcopyright} 2016 The Author(s) 2015. Published by Oxford University Press on behalf of Nucleic Acids Research. Chromatin immunoprecipitation with massively parallel sequencing (ChIP-seq) is widely used to identify binding sites for a target protein in the genome. An important scientific application is to identify changes in protein binding between different treatment conditions, i.e. to detect differential binding. This can reveal potential mechanisms through which changes in binding may contribute to the treatment effect. The csaw package provides a framework for the de novo detection of differentially bound genomic regions. It uses a window-based strategy to summarize read counts across the genome. It exploits existing statistical software to test for significant differences in each window. Finally, it clusters windows into regions for output and controls the false discovery rate properly over all detected regions. The csaw package can handle arbitrarily complex experimental designs involving biological replicates. It can be applied to both transcription factor and histone mark datasets, and, more generally, to any type of sequencing data measuring genomic coverage. csaw performs favorably against existing methods for de novo DB analyses on both simulated and real data. csaw is implemented as a R software package and is freely available from the open-source Bioconductor project.},
- author = {Lun, Aaron T.L. and Smyth, Gordon K.},
- doi = {10.1093/nar/gkv1191},
- file = {:Users/ryan/Documents/Mendeley Desktop/Lun, Smyth - 2015 - Csaw A Bioconductor package for differential binding analysis of ChIP-seq data using sliding windows.pdf:pdf},
- issn = {13624962},
- journal = {Nucleic Acids Research},
- number = {5},
- pages = {e45},
- title = {{Csaw: A Bioconductor package for differential binding analysis of ChIP-seq data using sliding windows}},
- url = {https://watermark.silverchair.com/gkv1191.pdf?token=AQECAHi208BE49Ooan9kkhW{\_}Ercy7Dm3ZL{\_}9Cf3qfKAc485ysgAAAkswggJHBgkqhkiG9w0BBwagggI4MIICNAIBADCCAi0GCSqGSIb3DQEHATAeBglghkgBZQMEAS4wEQQMkUlM9kjLKXtYEeNjAgEQgIIB{\_}hjiItrjjZSEGEf5sSd9UX1mcuj35kiORXtHZbig4VgTW7J},
- volume = {44},
- year = {2015}
- }
- @article{Storey2007,
- author = {Storey, John D.},
- doi = {10.1111/j.1467-9868.2007.005592.x},
- file = {:Users/ryan/Documents/Mendeley Desktop/Storey - 2007 - The optimal discovery procedure a new approach to simultaneous significance testing.pdf:pdf},
- issn = {1369-7412},
- journal = {Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
- keywords = {classification,false discovery rate,multiple-hypothesis testing,optimal discovery,procedure,q -value,single-thresholding procedure},
- month = {jun},
- number = {3},
- pages = {347--368},
- title = {{The optimal discovery procedure: a new approach to simultaneous significance testing}},
- url = {http://doi.wiley.com/10.1111/j.1467-9868.2007.005592.x},
- volume = {69},
- year = {2007}
- }
- @article{LeBlanc2003,
- abstract = {Mesenchymal stem cells (MSC) derived from adult BM or fetal liver form several mesenchymal tissues after appropriate stimulation. Reports indicate that MSC have unique immunologic properties, making them ideal for cellular therapy. MSC are not immunogenic, they do not stimulate alloreactivity, and they escape lysis by cytotoxic T-cells and natural killer (NK)-cells. Thus, MSC may be transplantable between HLA-mismatched individuals without the need for host immunosuppression. Furthermore, adult MSC appear to be immunosuppressive as they reduce alloreactivity and the formation of cytotoxic lymphocytes in vitro. In vivo, adult MSC prolong the time to rejection of mis-matched skin grafts in baboons. The immunosuppressive properties of first trimester fetal MSC are less pronounced, but inducible with IFN?. These findings imply a potential role for MSC, not only in the repair of damaged tissues, but also in the manipulation of immune responses.},
- annote = {doi: 10.1080/14653240310003611},
- author = {{Le Blanc}, K},
- doi = {10.1080/14653240310003611},
- issn = {1465-3249},
- journal = {Cytotherapy},
- month = {dec},
- number = {6},
- pages = {485--489},
- publisher = {Elsevier},
- title = {{Immunomodulatory effects of fetal and adult mesenchymal stem cells}},
- url = {https://doi.org/10.1080/14653240310003611},
- volume = {5},
- year = {2003}
- }
- @article{Kharchenko2014,
- abstract = {Single-cell data provide a means to dissect the composition of complex tissues and specialized cellular environments. However, the analysis of such measurements is complicated by high levels of technical noise and intrinsic biological variability. We describe a probabilistic model of expression-magnitude distortions typical of single-cell RNA-sequencing measurements, which enables detection of differential expression signatures and identification of subpopulations of cells in a way that is more tolerant of noise.},
- author = {Kharchenko, Peter V and Silberstein, Lev and Scadden, David T},
- doi = {10.1038/nmeth.2967},
- file = {:Users/ryan/Documents/Mendeley Desktop/Kharchenko, Silberstein, Scadden - 2014 - Bayesian approach to single-cell differential expression analysis.pdf:pdf},
- isbn = {1548-7091},
- issn = {1548-7105},
- journal = {Nature methods},
- number = {7},
- pages = {740--2},
- pmid = {24836921},
- title = {{Bayesian approach to single-cell differential expression analysis.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/24836921},
- volume = {11},
- year = {2014}
- }
- @article{Bartholome2009a,
- abstract = {In order to handle and interpret the vast amounts of data produced by microarray experiments, the analysis of sets of genes with a common biological functionality has been shown to be advantageous compared to single gene analyses. Some statistical methods have been proposed to analyse the differential gene expression of gene sets in microarray experiments. However, most of these methods either require threshhold values to be chosen for the analysis, or they need some reference set for the determination of significance. We present a method that estimates the number of differentially expressed genes in a gene set without requiring a threshold value for significance of genes. The method is self-contained (i.e., it does not require a reference set for comparison). In contrast to other methods which are focused on significance, our approach emphasizes the relevance of the regulation of gene sets. The presented method measures the degree of regulation of a gene set and is a useful tool to compare the induction of different gene sets and place the results of microarray experiments into the biological context. An R-package is available.},
- author = {Bartholom{\'{e}}, Kilian and Kreutz, Clemens and Timmer, Jens},
- doi = {10.1089/cmb.2008.0226},
- file = {:Users/ryan/Documents/Mendeley Desktop/Bartholom{\'{e}}, Kreutz, Timmer - 2009 - Estimation of gene induction enables a relevance-based ranking of gene sets.pdf:pdf},
- issn = {1557-8666},
- journal = {Journal of computational biology : a journal of computational molecular cell biology},
- keywords = {Gene Expression Profiling,Gene Expression Profiling: methods,Gene Expression Regulation,Models, Biological,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods},
- month = {jul},
- number = {7},
- pages = {959--67},
- pmid = {19580524},
- title = {{Estimation of gene induction enables a relevance-based ranking of gene sets.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/19580524},
- volume = {16},
- year = {2009}
- }
- @article{Dunkler2010a,
- abstract = {MOTIVATION: Univariate Cox regression (COX) is often used to select genes possibly linked to survival. With non-proportional hazards (NPH), COX could lead to under- or over-estimation of effects. The effect size measure c=P(T(1){\textless}T(0)), i.e. the probability that a person randomly chosen from group G(1) dies earlier than a person from G(0), is independent of the proportional hazards (PH) assumption. Here we consider its generalization to continuous data c' and investigate the suitability of c' for gene selection.
- RESULTS: Under PH, c' is most efficiently estimated by COX. Under NPH, c' can be obtained by weighted Cox regression (WHE) or a novel method, concordance regression (CON). The least biased and most stable estimates were obtained by CON. We propose to use c' as summary measure of effect size to rank genes irrespective of different types of NPH and censoring patterns.
- AVAILABILITY: WHE and CON are available as R packages.
- CONTACT: georg.heinze@meduniwien.ac.at
- SUPPLEMENTARY INFORMATION: Supplementary Data are available at Bioinformatics online.},
- author = {Dunkler, Daniela and Schemper, Michael and Heinze, Georg},
- doi = {10.1093/bioinformatics/btq035},
- file = {:Users/ryan/Documents/Mendeley Desktop/Dunkler, Schemper, Heinze - 2010 - Gene selection in microarray survival studies under possibly non-proportional hazards.pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {Gene Expression Profiling,Gene Expression Profiling: methods,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Regression Analysis,Transcription Factors,Transcription Factors: genetics},
- month = {mar},
- number = {6},
- pages = {784--90},
- pmid = {20118118},
- title = {{Gene selection in microarray survival studies under possibly non-proportional hazards.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/20118118},
- volume = {26},
- year = {2010}
- }
- @article{Tekes2011,
- abstract = {The RNA synthesis machinery of vesicular stomatitis virus (VSV) comprises the genomic RNA encapsidated by the viral nucleocapsid protein (N) and associated with the RNA dependent RNA polymerase, the viral components of which are a large protein (L) and an accessory phosphoprotein (P). The 241 kDa L protein contains all the enzymatic activities necessary for synthesis of the viral mRNAs, including capping, cap methylation and polyadenylation. Those RNA processing reactions are intimately coordinated with nucleotide polymerization such that failure to cap results in termination of transcription and failure to methylate can result in hyper polyadenylation. The mRNA processing reactions thus serve as a critical check point in viral RNA synthesis which may control the synthesis of incorrectly modified RNAs. Here, we report the length at which viral transcripts first gain access to the capping machinery during synthesis. By reconstitution of transcription in vitro with highly purified recombinant polymerase and engineered templates in which we omitted sites for incorporation of UTP, we found that transcripts that were 30-nucleotides in length were uncapped, whereas those that were 31-nucleotides in length contained a cap structure. The minimal RNA length required for mRNA cap addition was also sufficient for methylation since the 31-nucleotide long transcripts were methylated at both ribose-2'-O and guanine-N-7 positions. This work provides insights into the spatial relationship between the active sites for the RNA dependent RNA polymerase and polyribonucleotidyltransferase responsible for capping of the viral RNA. We combine the present findings with our recently described electron microscopic structure of the VSV polymerase and propose a model of how the spatial arrangement of the capping activities of L may influence nucleotide polymerization.},
- author = {Tekes, Gergely and Rahmeh, Amal a and Whelan, Sean P J},
- doi = {10.1371/journal.ppat.1002073},
- file = {:Users/ryan/Documents/Mendeley Desktop/Tekes, Rahmeh, Whelan - 2011 - A freeze frame view of vesicular stomatitis virus transcription defines a minimal length of RNA for 5' pr.pdf:pdf},
- issn = {1553-7374},
- journal = {PLoS pathogens},
- keywords = {5' Flanking Region,5' Flanking Region: genetics,Animals,Cells, Cultured,Guanine,Guanine: metabolism,Methylation,Nucleocapsid,Nucleocapsid: genetics,Organisms, Genetically Modified,RNA Caps,RNA Caps: genetics,RNA Caps: metabolism,RNA Replicase,RNA Replicase: genetics,RNA Replicase: metabolism,RNA, Messenger,RNA, Messenger: genetics,RNA, Messenger: metabolism,RNA, Viral,RNA, Viral: genetics,RNA, Viral: metabolism,Ribose,Ribose: metabolism,Spodoptera,Transcription, Genetic,Uridine Triphosphate,Uridine Triphosphate: metabolism,Vesicular Stomatitis,Vesicular Stomatitis: virology,Vesiculovirus,Vesiculovirus: genetics,Vesiculovirus: metabolism,Viral Nonstructural Proteins,Viral Nonstructural Proteins: genetics,Viral Nonstructural Proteins: metabolism,Viral Proteins,Viral Proteins: genetics,Viral Proteins: metabolism,Virus Replication,Virus Replication: genetics},
- month = {jun},
- number = {6},
- pages = {e1002073},
- pmid = {21655110},
- title = {{A freeze frame view of vesicular stomatitis virus transcription defines a minimal length of RNA for 5' processing.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3107219{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {7},
- year = {2011}
- }
- @article{Han2014,
- author = {Han, Pei and Li, Wei and Lin, Chiou-Hong and Yang, Jin and Shang, Ching and Nurnberg, Sylvia T. and Jin, Kevin Kai and Xu, Weihong and Lin, Chieh-Yu and Lin, Chien-jung and Xiong, Yiqin and Chien, Huan-chieh and Zhou, Bin and Ashley, Euan and Bernstein, Daniel and Chen, Peng-Sheng and Chen, Huei-sheng Vincent and Quertermous, Thomas and Chang, Ching-pin},
- doi = {10.1038/nature13596},
- file = {:Users/ryan/Documents/Mendeley Desktop/Han et al. - 2014 - A long noncoding RNA protects the heart from pathological hypertrophy.pdf:pdf},
- issn = {0028-0836},
- journal = {Nature},
- month = {aug},
- number = {7520},
- pages = {102--106},
- publisher = {Nature Publishing Group},
- title = {{A long noncoding RNA protects the heart from pathological hypertrophy}},
- url = {http://dx.doi.org/10.1038/nature13596 http://www.nature.com/doifinder/10.1038/nature13596},
- volume = {514},
- year = {2014}
- }
- @article{Ritchie2015,
- abstract = {limma is an R/Bioconductor software package that provides an integrated solution for analysing data from gene expression experiments. It contains rich features for handling complex experimental designs and for information borrowing to overcome the problem of small sample sizes. Over the past decade, limma has been a popular choice for gene discovery through differential expression analyses of microarray and high-throughput PCR data. The package contains particularly strong facilities for reading, normalizing and exploring such data. Recently, the capabilities of limma have been significantly expanded in two important directions. First, the package can now perform both differential expression and differential splicing analyses of RNA sequencing (RNA-seq) data. All the downstream analysis tools previously restricted to microarray data are now available for RNA-seq as well. These capabilities allow users to analyse both RNA-seq and microarray data with very similar pipelines. Second, the package is now able to go past the traditional gene-wise expression analyses in a variety of ways, analysing expression profiles in terms of co-regulated sets of genes or in terms of higher-order expression signatures. This provides enhanced possibilities for biological interpretation of gene expression differences. This article reviews the philosophy and design of the limma package, summarizing both new and historical features, with an emphasis on recent enhancements and features that have not been previously described.},
- author = {Ritchie, Matthew E. and Phipson, Belinda and Wu, Di and Hu, Yifang and Law, Charity W. and Shi, Wei and Smyth, Gordon K.},
- doi = {10.1093/nar/gkv007},
- file = {:Users/ryan/Documents/Mendeley Desktop/Ritchie et al. - 2015 - limma powers differential expression analyses for RNA-sequencing and microarray studies.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Ritchie et al. - 2015 - limma powers differential expression analyses for RNA-sequencing and microarray studies(3).pdf:pdf},
- isbn = {0305-1048},
- issn = {13624962},
- journal = {Nucleic Acids Research},
- number = {7},
- pages = {1--13},
- pmid = {25605792},
- title = {{limma powers differential expression analyses for RNA-sequencing and microarray studies}},
- url = {http://nar.oxfordjournals.org/content/early/2015/01/20/nar.gkv007.full.pdf http://nar.oxfordjournals.org/content/43/7/e47.full.pdf},
- volume = {43},
- year = {2015}
- }
- @article{Peng2012a,
- abstract = {MOTIVATION: Next-generation sequencing allows us to sequence reads from a microbial environment using single-cell sequencing or metagenomic sequencing technologies. However, both technologies suffer from the problem that sequencing depth of different regions of a genome or genomes from different species are highly uneven. Most existing genome assemblers usually have an assumption that sequencing depths are even. These assemblers fail to construct correct long contigs.
- RESULTS: We introduce the IDBA-UD algorithm that is based on the de Bruijn graph approach for assembling reads from single-cell sequencing or metagenomic sequencing technologies with uneven sequencing depths. Several non-trivial techniques have been employed to tackle the problems. Instead of using a simple threshold, we use multiple depthrelative thresholds to remove erroneous k-mers in both low-depth and high-depth regions. The technique of local assembly with paired-end information is used to solve the branch problem of low-depth short repeat regions. To speed up the process, an error correction step is conducted to correct reads of high-depth regions that can be aligned to highconfident contigs. Comparison of the performances of IDBA-UD and existing assemblers (Velvet, Velvet-SC, SOAPdenovo and Meta-IDBA) for different datasets, shows that IDBA-UD can reconstruct longer contigs with higher accuracy.
- AVAILABILITY: The IDBA-UD toolkit is available at our website http://www.cs.hku.hk/{\~{}}alse/idba{\_}ud},
- author = {Peng, Yu and Leung, Henry C M and Yiu, S M and Chin, Francis Y L},
- doi = {10.1093/bioinformatics/bts174},
- file = {:Users/ryan/Documents/Mendeley Desktop/Peng et al. - 2012 - IDBA-UD a de novo assembler for single-cell and metagenomic sequencing data with highly uneven depth.pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {Algorithms,Bacteria,Bacteria: genetics,Genome,High-Throughput Nucleotide Sequencing,Metagenomics,Metagenomics: methods,Sequence Analysis, DNA,Sequence Analysis, DNA: methods,Single-Cell Analysis,Single-Cell Analysis: methods},
- month = {jun},
- number = {11},
- pages = {1420--8},
- pmid = {22495754},
- title = {{IDBA-UD: a de novo assembler for single-cell and metagenomic sequencing data with highly uneven depth.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/22495754},
- volume = {28},
- year = {2012}
- }
- @article{Liu2015,
- abstract = {{\textcopyright} 2015 The Author(s).Variations in sample quality are frequently encountered in small RNA-sequencing experiments, and pose a major challenge in a differential expression analysis. Removal of high variation samples reduces noise, but at a cost of reducing power, thus limiting our ability to detect biologically meaningful changes. Similarly, retaining these samples in the analysis may not reveal any statistically significant changes due to the higher noise level. A compromise is to use all available data, but to down-weight the observations from more variable samples. We describe a statistical approach that facilitates this by modelling heterogeneity at both the sample and observational levels as part of the differential expression analysis. At the sample level this is achieved by fitting a log-linear variance model that includes common sample-specific or group-specific parameters that are shared between genes. The estimated sample variance factors are then converted to weights and combined with observational level weights obtained from the mean-variance relationship of the log-counts-per-million using 'voom'. A comprehensive analysis involving both simulations and experimental RNA-sequencing data demonstrates that this strategy leads to a universally more powerful analysis and fewer false discoveries when compared to conventional approaches. This methodology has wide application and is implemented in the open-source 'limma' package.},
- author = {Liu, Ruijie and Holik, Aliaksei Z. and Su, Shian and Jansz, Natasha and Chen, Kelan and Leong, Huei San and Blewitt, Marnie E. and Asselin-Labat, Marie-Liesse and Smyth, Gordon K. and Ritchie, Matthew E.},
- doi = {10.1093/nar/gkv412},
- file = {:Users/ryan/Documents/Mendeley Desktop/Liu et al. - 2015 - Why weight Modelling sample and observational level variability improves power in RNA-seq analyses.pdf:pdf},
- issn = {0305-1048},
- journal = {Nucleic Acids Research},
- month = {sep},
- number = {15},
- pages = {e97--e97},
- title = {{Why weight? Modelling sample and observational level variability improves power in RNA-seq analyses}},
- url = {https://academic.oup.com/nar/article-lookup/doi/10.1093/nar/gkv412},
- volume = {43},
- year = {2015}
- }
- @article{DeCaprio2007,
- author = {DeCaprio, D and Vinson, JP},
- doi = {10.1101/gr.6558107.},
- file = {:Users/ryan/Documents/Mendeley Desktop/DeCaprio, Vinson - 2007 - Conrad gene prediction using conditional random fields.pdf:pdf},
- journal = {Genome {\ldots}},
- pages = {1389--1398},
- title = {{Conrad: gene prediction using conditional random fields}},
- url = {http://genome.cshlp.org/content/17/9/1389.short},
- year = {2007}
- }
- @article{Huber2002,
- author = {Huber, W. and von Heydebreck, A. and Sultmann, H. and Poustka, Annemarie and Vingron, Martin},
- doi = {10.1093/bioinformatics/18.suppl_1.S96},
- file = {:Users/ryan/Documents/Mendeley Desktop/Huber et al. - 2002 - Variance stabilization applied to microarray data calibration and to the quantification of differential expression.pdf:pdf},
- issn = {1367-4803},
- journal = {Bioinformatics},
- month = {jul},
- number = {Suppl 1},
- pages = {S96--S104},
- title = {{Variance stabilization applied to microarray data calibration and to the quantification of differential expression}},
- url = {http://bioinformatics.oxfordjournals.org/cgi/doi/10.1093/bioinformatics/18.suppl{\_}1.S96},
- volume = {18},
- year = {2002}
- }
- @article{Yandell2012,
- abstract = {The falling cost of genome sequencing is having a marked impact on the research community with respect to which genomes are sequenced and how and where they are annotated. Genome annotation projects have generally become small-scale affairs that are often carried out by an individual laboratory. Although annotating a eukaryotic genome assembly is now within the reach of non-experts, it remains a challenging task. Here we provide an overview of the genome annotation process and the available tools and describe some best-practice approaches.},
- author = {Yandell, Mark and Ence, Daniel},
- doi = {10.1038/nrg3174},
- file = {:Users/ryan/Documents/Mendeley Desktop/Yandell, Ence - 2012 - A beginner's guide to eukaryotic genome annotation.pdf:pdf},
- issn = {1471-0064},
- journal = {Nature reviews. Genetics},
- keywords = {Base Sequence,Databases, Genetic,Eukaryota,Eukaryota: genetics,Exons,Genome,Genomics,Humans,Introns,Molecular Sequence Annotation,Molecular Sequence Annotation: methods,Molecular Sequence Annotation: standards,Molecular Sequence Data,Quality Control,RNA,RNA: genetics,Sequence Alignment,Software},
- month = {may},
- number = {5},
- pages = {329--42},
- pmid = {22510764},
- publisher = {Nature Publishing Group},
- title = {{A beginner's guide to eukaryotic genome annotation.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/22510764},
- volume = {13},
- year = {2012}
- }
- @article{Hoang2011,
- abstract = {The advent of ChIP-seq technology has made the investigation of epigenetic regulatory networks a computationally tractable problem. Several groups have applied statistical computing methods to ChIP-seq datasets to gain insight into the epigenetic regulation of transcription. However, methods for estimating enrichment levels in ChIP-seq data for these computational studies are understudied and variable. Since the conclusions drawn from these data mining and machine learning applications strongly depend on the enrichment level inputs, a comparison of estimation methods with respect to the performance of statistical models should be made.},
- author = {Hoang, Stephen a and Xu, Xiaojiang and Bekiranov, Stefan},
- doi = {10.1186/1756-0500-4-288},
- file = {:Users/ryan/Documents/Mendeley Desktop/Hoang, Xu, Bekiranov - 2011 - Quantification of histone modification ChIP-seq enrichment for data mining and machine learning applicatio.pdf:pdf},
- issn = {1756-0500},
- journal = {BMC research notes},
- month = {jan},
- number = {1},
- pages = {288},
- pmid = {21834981},
- publisher = {BioMed Central Ltd},
- title = {{Quantification of histone modification ChIP-seq enrichment for data mining and machine learning applications.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3170335{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {4},
- year = {2011}
- }
- @article{Nix2008,
- author = {Nix, David a and Courdy, Samir J and Boucher, Kenneth M},
- doi = {10.1186/1471-2105-9-523},
- file = {:Users/ryan/Documents/Mendeley Desktop/Nix, Courdy, Boucher - 2008 - Empirical methods for controlling false positives and estimating confidence in ChIP-Seq peaks.pdf:pdf},
- issn = {1471-2105},
- journal = {BMC Bioinformatics},
- number = {1},
- pages = {523},
- title = {{Empirical methods for controlling false positives and estimating confidence in ChIP-Seq peaks}},
- url = {http://www.biomedcentral.com/1471-2105/9/523},
- volume = {9},
- year = {2008}
- }
- @article{Thomson2010,
- abstract = {CpG islands (CGIs) are prominent in the mammalian genome owing to their GC-rich base composition and high density of CpG dinucleotides. Most human gene promoters are embedded within CGIs that lack DNA methylation and coincide with sites of histone H3 lysine 4 trimethylation (H3K4me3), irrespective of transcriptional activity. In spite of these intriguing correlations, the functional significance of non-methylated CGI sequences with respect to chromatin structure and transcription is unknown. By performing a search for proteins that are common to all CGIs, here we show high enrichment for Cfp1, which selectively binds to non-methylated CpGs in vitro. Chromatin immunoprecipitation of a mono-allelically methylated CGI confirmed that Cfp1 specifically associates with non-methylated CpG sites in vivo. High throughput sequencing of Cfp1-bound chromatin identified a notable concordance with non-methylated CGIs and sites of H3K4me3 in the mouse brain. Levels of H3K4me3 at CGIs were markedly reduced in Cfp1-depleted cells, consistent with the finding that Cfp1 associates with the H3K4 methyltransferase Setd1 (refs 7, 8). To test whether non-methylated CpG-dense sequences are sufficient to establish domains of H3K4me3, we analysed artificial CpG clusters that were integrated into the mouse genome. Despite the absence of promoters, the insertions recruited Cfp1 and created new peaks of H3K4me3. The data indicate that a primary function of non-methylated CGIs is to genetically influence the local chromatin modification state by interaction with Cfp1 and perhaps other CpG-binding proteins.},
- author = {Thomson, John P and Skene, Peter J and Selfridge, Jim and Clouaire, Thomas and Guy, Jacky and Webb, Shaun and Kerr, Alastair R W and Deaton, Aim{\'{e}}e and Andrews, Rob and James, Keith D and Turner, Daniel J and Illingworth, Robert and Bird, Adrian},
- doi = {10.1038/nature08924},
- file = {:Users/ryan/Documents/Mendeley Desktop/Thomson et al. - 2010 - CpG islands influence chromatin structure via the CpG-binding protein Cfp1.pdf:pdf},
- issn = {1476-4687},
- journal = {Nature},
- keywords = {Alleles,Animals,Brain,Brain: cytology,Cell Line,Chromatin,Chromatin Assembly and Disassembly,Chromatin Immunoprecipitation,Chromatin: genetics,Chromatin: metabolism,CpG Islands,CpG Islands: genetics,DNA Methylation,Genome,Genome: genetics,Histone-Lysine N-Methyltransferase,Histone-Lysine N-Methyltransferase: metabolism,Histones,Histones: chemistry,Histones: metabolism,Methylation,Mice,NIH 3T3 Cells,Promoter Regions, Genetic,Trans-Activators,Trans-Activators: chemistry,Trans-Activators: deficiency,Trans-Activators: genetics,Trans-Activators: metabolism,Zinc Fingers},
- month = {apr},
- number = {7291},
- pages = {1082--6},
- pmid = {20393567},
- title = {{CpG islands influence chromatin structure via the CpG-binding protein Cfp1.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/20393567},
- volume = {464},
- year = {2010}
- }
- @article{Lun2014,
- abstract = {A common aim in ChIP-seq experiments is to identify changes in protein binding patterns between conditions, i.e. differential binding. A number of peak- and window-based strategies have been developed to detect differential binding when the regions of interest are not known in advance. However, careful consideration of error control is needed when applying these methods. Peak-based approaches use the same data set to define peaks and to detect differential binding. Done improperly, this can result in loss of type I error control. For window-based methods, controlling the false discovery rate over all detected windows does not guarantee control across all detected regions. Misinterpreting the former as the latter can result in unexpected liberalness. Here, several solutions are presented to maintain error control for these de novo counting strategies. For peak-based methods, peak calling should be performed on pooled libraries prior to the statistical analysis. For window-based methods, a hybrid approach using Simes' method is proposed to maintain control of the false discovery rate across regions. More generally, the relative advantages of peak- and window-based strategies are explored using a range of simulated and real data sets. Implementations of both strategies also compare favourably to existing programs for differential binding analyses.},
- author = {Lun, Aaron T L and Smyth, Gordon K},
- doi = {10.1093/nar/gku351},
- file = {:Users/ryan/Documents/Mendeley Desktop/Lun, Smyth - 2014 - De novo detection of differentially bound regions for ChIP-seq data using peaks and windows controlling error rat(2).pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Lun, Smyth - 2014 - De novo detection of differentially bound regions for ChIP-seq data using peaks and windows controlling error rates.pdf:pdf},
- issn = {1362-4962},
- journal = {Nucleic acids research},
- month = {jan},
- number = {11},
- pages = {e95},
- pmid = {24852250},
- title = {{De novo detection of differentially bound regions for ChIP-seq data using peaks and windows: controlling error rates correctly.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=4066778{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {42},
- year = {2014}
- }
- @article{Auer2010,
- abstract = {Next-generation sequencing technologies are quickly becoming the preferred approach for characterizing and quantifying entire genomes. Even though data produced from these technologies are proving to be the most informative of any thus far, very little attention has been paid to fundamental design aspects of data collection and analysis, namely sampling, randomization, replication, and blocking. We discuss these concepts in an RNA sequencing framework. Using simulations we demonstrate the benefits of collecting replicated RNA sequencing data according to well known statistical designs that partition the sources of biological and technical variation. Examples of these designs and their corresponding models are presented with the goal of testing differential expression.},
- author = {Auer, Paul L and Doerge, R W},
- doi = {10.1534/genetics.110.114983},
- file = {:Users/ryan/Documents/Mendeley Desktop/Auer, Doerge - 2010 - Statistical design and analysis of RNA sequencing data.pdf:pdf},
- issn = {1943-2631},
- journal = {Genetics},
- keywords = {Base Sequence,Clinical Laboratory Techniques,Research,Research: methods},
- month = {jun},
- number = {2},
- pages = {405--16},
- pmid = {20439781},
- title = {{Statistical design and analysis of RNA sequencing data.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2881125{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {185},
- year = {2010}
- }
- @article{Carlson2013,
- author = {Carlson, Marc and Obenchain, Valerie and Shannon, Paul and Tenenbaum, Dan and Bioconductor, R},
- file = {:Users/ryan/Documents/Mendeley Desktop/Carlson et al. - 2013 - Intermediate R Bioconductor for Sequence Analysis.pdf:pdf},
- title = {{Intermediate R / Bioconductor for Sequence Analysis}},
- year = {2013}
- }
- @article{Bresler2012,
- author = {Bresler, M. and Sheehan, S. and Chan, a. H. and Song, Y. S.},
- doi = {10.1093/bioinformatics/bts399},
- file = {:Users/ryan/Documents/Mendeley Desktop/Bresler et al. - 2012 - Telescoper de novo assembly of highly repetitive regions.pdf:pdf},
- issn = {1367-4803},
- journal = {Bioinformatics},
- month = {sep},
- number = {18},
- pages = {i311--i317},
- title = {{Telescoper: de novo assembly of highly repetitive regions}},
- url = {http://bioinformatics.oxfordjournals.org/cgi/doi/10.1093/bioinformatics/bts399},
- volume = {28},
- year = {2012}
- }
- @article{Conesa2006,
- abstract = {MOTIVATION: Multi-series time-course microarray experiments are useful approaches for exploring biological processes. In this type of experiments, the researcher is frequently interested in studying gene expression changes along time and in evaluating trend differences between the various experimental groups. The large amount of data, multiplicity of experimental conditions and the dynamic nature of the experiments poses great challenges to data analysis. RESULTS: In this work, we propose a statistical procedure to identify genes that show different gene expression profiles across analytical groups in time-course experiments. The method is a two-regression step approach where the experimental groups are identified by dummy variables. The procedure first adjusts a global regression model with all the defined variables to identify differentially expressed genes, and in second a variable selection strategy is applied to study differences between groups and to find statistically significant different profiles. The methodology is illustrated on both a real and a simulated microarray dataset.},
- author = {Conesa, Ana and Nueda, Mar{\'{i}}a Jos{\'{e}} and Ferrer, Alberto and Tal{\'{o}}n, Manuel},
- doi = {10.1093/bioinformatics/btl056},
- file = {:Users/ryan/Documents/Mendeley Desktop/Conesa et al. - 2006 - maSigPro a method to identify significantly differential expression profiles in time-course microarray experiment.pdf:pdf},
- issn = {1367-4803},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {Algorithms,Computer Simulation,Gene Expression,Gene Expression Profiling,Gene Expression Profiling: methods,Gene Expression: physiology,Models, Genetic,Models, Statistical,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Software,Time Factors},
- month = {may},
- number = {9},
- pages = {1096--102},
- pmid = {16481333},
- title = {{maSigPro: a method to identify significantly differential expression profiles in time-course microarray experiments.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/16481333},
- volume = {22},
- year = {2006}
- }
- @article{Zang2009,
- abstract = {MOTIVATION: Chromatin states are the key to gene regulation and cell identity. Chromatin immunoprecipitation (ChIP) coupled with high-throughput sequencing (ChIP-Seq) is increasingly being used to map epigenetic states across genomes of diverse species. Chromatin modification profiles are frequently noisy and diffuse, spanning regions ranging from several nucleosomes to large domains of multiple genes. Much of the early work on the identification of ChIP-enriched regions for ChIP-Seq data has focused on identifying localized regions, such as transcription factor binding sites. Bioinformatic tools to identify diffuse domains of ChIP-enriched regions have been lacking. RESULTS: Based on the biological observation that histone modifications tend to cluster to form domains, we present a method that identifies spatial clusters of signals unlikely to appear by chance. This method pools together enrichment information from neighboring nucleosomes to increase sensitivity and specificity. By using genomic-scale analysis, as well as the examination of loci with validated epigenetic states, we demonstrate that this method outperforms existing methods in the identification of ChIP-enriched signals for histone modification profiles. We demonstrate the application of this unbiased method in important issues in ChIP-Seq data analysis, such as data normalization for quantitative comparison of levels of epigenetic modifications across cell types and growth conditions. AVAILABILITY: http://home.gwu.edu/ approximately wpeng/Software.htm. SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.},
- author = {Zang, Chongzhi and Schones, Dustin E. and Zeng, Chen and Cui, Kairong and Zhao, Keji and Peng, Weiqun},
- doi = {10.1093/bioinformatics/btp340},
- file = {:Users/ryan/Documents/Mendeley Desktop/Zang et al. - 2009 - A clustering approach for identification of enriched domains from histone modification ChIP-Seq data.pdf:pdf},
- isbn = {1367-4811 (Electronic)$\backslash$r1367-4803 (Linking)},
- issn = {13674803},
- journal = {Bioinformatics},
- number = {15},
- pages = {1952--1958},
- pmid = {19505939},
- title = {{A clustering approach for identification of enriched domains from histone modification ChIP-Seq data}},
- url = {http://bioinformatics.oxfordjournals.org/content/25/15/1952.full.pdf},
- volume = {25},
- year = {2009}
- }
- @article{Stephens2016,
- abstract = {We introduce a new Empirical Bayes approach for large-scale hypothesis testing, including estimating False Discovery Rates (FDRs), and effect sizes. This approach has two key differences from existing approaches to FDR analysis. First, it assumes that the distribution of the actual (unobserved) effects is unimodal, with a mode at 0. This "unimodal assumption" (UA), although natural in many contexts, is not usually incorporated into standard FDR analysis, and we demonstrate how incorporating it brings many benefits. Specifically, the UA facilitates efficient and robust computation -- estimating the unimodal distribution involves solving a simple convex optimization problem -- and enables more accurate inferences provided that it holds. Second, the method takes as its input two numbers for each test (an effect size estimate, and corresponding standard error), rather than the one number usually used (p value, or z score). When available, using two numbers instead of one helps account for variation in measurement precision across tests. It also facilitates estimation of effects, and unlike standard FDR methods our approach provides interval estimates (credible regions) for each effect in addition to measures of significance. To provide a bridge between interval estimates and significance measures we introduce the term "local false sign rate" to refer to the probability of getting the sign of an effect wrong, and argue that it is a superior measure of significance than the local FDR because it is both more generally applicable, and can be more robustly estimated. Our methods are implemented in an R package ashr available from http://github.com/stephens999/ashr.},
- author = {Stephens, Matthew},
- doi = {10.1101/038216},
- file = {:Users/ryan/Documents/Mendeley Desktop/Stephens - 2016 - False Discovery Rates A New Deal.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Stephens - 2016 - False Discovery Rates A New Deal(2).pdf:pdf},
- issn = {1465-4644},
- journal = {bioRxiv},
- keywords = {empirical bayes,false discovery rates,multiple testing,shrinkage,unimodal},
- pages = {1--12},
- pmid = {27756721},
- title = {{False Discovery Rates: A New Deal.}},
- url = {http://biorxiv.org/lookup/doi/10.1101/038216 http://biorxiv.org/content/biorxiv/early/2016/01/29/038216.full.pdf},
- year = {2016}
- }
- @article{Champagne2014,
- abstract = {Land cover and land use classifications from remote sensing are increasingly becoming institutionalized framework data sets for monitoring environmental change. As such, the need for robust statements of classification accuracy is critical. This paper describes a method to estimate confidence in classification model accuracy using a bootstrap approach. Using this method, it was found that classification accuracy and confidence, while closely related, can be used in complementary ways to provide additional information on map accuracy and define groups of classes and to inform the future reference sampling strategies. Overall classification accuracy increases with an increase in the number of fields surveyed, where the width of classification confidence bounds decreases. Individual class accuracies and confidence were non-linearly related to the number of fields surveyed. Results indicate that some classes can be estimated accurately and confidently with fewer numbers of samples, whereas others require larger reference data sets to achieve satisfactory results. This approach is an improvement over other approaches for estimating class accuracy and confidence as it uses repetitive sampling to produce a more realistic estimate of the range in classification accuracy and confidence that can be obtained with different reference data inputs. ?? 2014 Published by Elsevier B.V.},
- author = {Champagne, Catherine and McNairn, Heather and Daneshfar, Bahram and Shang, Jiali},
- doi = {10.1016/j.jag.2013.12.016},
- file = {:Users/ryan/Documents/Mendeley Desktop/Champagne et al. - 2014 - A bootstrap method for assessing classification accuracy and confidence for agricultural land use mapping in C.pdf:pdf},
- issn = {15698432},
- journal = {International Journal of Applied Earth Observation and Geoinformation},
- number = {1},
- pages = {44--52},
- publisher = {Elsevier B.V.},
- title = {{A bootstrap method for assessing classification accuracy and confidence for agricultural land use mapping in Canada}},
- url = {http://dx.doi.org/10.1016/j.jag.2013.12.016 http://ac.els-cdn.com/S0303243414000026/1-s2.0-S0303243414000026-main.pdf?{\_}tid=295d3a48-1964-11e6-b931-00000aab0f26{\&}acdnat=1463183046{\_}41602b3c63dcd4ee65da1b4454b41124},
- volume = {29},
- year = {2014}
- }
- @article{Roberts2013,
- author = {Roberts, Adam and Pachter, Lior},
- doi = {10.1038/NMETH.2251},
- file = {:Users/ryan/Documents/Mendeley Desktop/Roberts, Pachter - 2013 - Streaming fragment assignment for real-time analysis of sequencing experiments.pdf:pdf},
- number = {1},
- title = {{Streaming fragment assignment for real-time analysis of sequencing experiments}},
- volume = {10},
- year = {2013}
- }
- @article{Piechota2016,
- abstract = {BACKGROUND: The regulation of gene expression in eukaryotic cells is a complex process that involves epigenetic modifications and the interaction of DNA with multiple transcription factors. This process can be studied with unprecedented sensitivity using a combination of chromatin immunoprecipitation and next-generation DNA sequencing (ChIP-seq). Available ChIP-seq data can be further utilized to interpret new gene expression profiling experiments.$\backslash$n$\backslash$nRESULTS: Here, we describe seqinspector, a tool that accepts any set of genomic coordinates from ChIP-seq or RNA-seq studies to identify shared transcriptional regulators. The presented web resource includes a large collection of publicly available ChIP-seq and RNA-seq experiments ({\textgreater}1300 tracks) performed on transcription factors, histone modifications, RNA polymerases, enhancers and insulators in humans and mice. Over-representation is calculated based on the coverage computed directly from indexed files storing ChIP-seq data (bigwig). Therefore, seqinspector is not limited to pre-computed sets of gene promoters.$\backslash$n$\backslash$nCONCLUSION: The tool can be used to identify common gene expression regulators for sets of co-expressed transcripts (including miRNAs, lncRNAs or any novel unannotated RNAs) or for sets of ChIP-seq peaks to identify putative protein-protein interactions or transcriptional co-factors. The tool is available at http://seqinspector.cremag.org .},
- author = {Piechota, Marcin and Korostynski, Michal and Ficek, Joanna and Tomski, Andrzej and Przewlocki, Ryszard},
- doi = {10.1186/s12859-016-0938-4},
- file = {:Users/ryan/Documents/Mendeley Desktop/Piechota et al. - 2016 - Seqinspector Position-based navigation through the ChIP-seq data landscape to identify gene expression regulato.pdf:pdf},
- issn = {14712105},
- journal = {BMC Bioinformatics},
- keywords = {ChIP-seq,Gene expression,Microarray,Promoter analysis,RNA-seq,Transcription factor},
- number = {1},
- pages = {1--7},
- pmid = {26868127},
- publisher = {BMC Bioinformatics},
- title = {{Seqinspector: Position-based navigation through the ChIP-seq data landscape to identify gene expression regulators}},
- url = {http://dx.doi.org/10.1186/s12859-016-0938-4 https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4751710/pdf/12859{\_}2016{\_}Article{\_}938.pdf},
- volume = {17},
- year = {2016}
- }
- @article{Demissie2008,
- abstract = {MOTIVATION: In searching for differentially expressed (DE) genes in microarray data, we often observe a fraction of the genes to have unequal variability between groups. This is not an issue in large samples, where a valid test exists that uses individual variances separately. The problem arises in the small-sample setting, where the approximately valid Welch test lacks sensitivity, while the more sensitive moderated t-test assumes equal variance.
- METHODS: We introduce a moderated Welch test (MWT) that allows unequal variance between groups. It is based on (i) weighting of pooled and unpooled standard errors and (ii) improved estimation of the gene-level variance that exploits the information from across the genes.
- RESULTS: When a non-trivial proportion of genes has unequal variability, false discovery rate (FDR) estimates based on the standard t and moderated t-tests are often too optimistic, while the standard Welch test has low sensitivity. The MWT is shown to (i) perform better than the standard t, the standard Welch and the moderated t-tests when the variances are unequal between groups and (ii) perform similarly to the moderated t, and better than the standard t and Welch tests when the group variances are equal. These results mean that MWT is more reliable than other existing tests over wider range of data conditions.
- AVAILABILITY: R package to perform MWT is available at http://www.meb.ki.se/{\~{}}yudpaw},
- author = {Demissie, Meaza and Mascialino, Barbara and Calza, Stefano and Pawitan, Yudi},
- doi = {10.1093/bioinformatics/btn100},
- file = {:Users/ryan/Documents/Mendeley Desktop/Demissie et al. - 2008 - Unequal group variances in microarray data analyses.pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {Algorithms,Analysis of Variance,Artifacts,Data Interpretation, Statistical,Gene Expression Profiling,Gene Expression Profiling: methods,Genetic Variation,Genetic Variation: genetics,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Reproducibility of Results,Sample Size,Sensitivity and Specificity},
- month = {may},
- number = {9},
- pages = {1168--74},
- pmid = {18344518},
- title = {{Unequal group variances in microarray data analyses.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/18344518},
- volume = {24},
- year = {2008}
- }
- @article{Gevaert2015,
- abstract = {Summary: DNA methylation is an important mechanism regulating gene transcription, and its role in carcinogenesis has been extensively studied. Hyper and hypomethylation of genes is an alternative mechanism to deregulate gene expression in a wide range of diseases. At the same time, high-throughput DNA methylation assays have been developed generating vast amounts of genome wide DNA methylation measurements. Yet, few tools exist that can formally identify hypo and hypermethylated genes that are predictive of transcription and thus functionally relevant for a particular disease. To accommodate this lack of tools, we developed MethylMix, an algorithm implemented in R to identify disease specific hyper and hypomethylated genes. MethylMix is based on a beta mixture model to identify methylation states and compares them with the normal DNA methylation state. MethylMix introduces a novel metric, the ‘Differential Methylation value' or DM-value defined as the difference of a methylation state with the normal methylation state. Finally, matched gene expression data are used to identify, besides differential, transcriptionally predictive methylation states by focusing on methylation changes that effect gene expression.$\backslash$nAvailability and implementation: MethylMix was implemented as an R package and is available in bioconductor.$\backslash$nContact: olivier.gevaert@stanford.edu},
- author = {Gevaert, Olivier},
- doi = {10.1093/bioinformatics/btv020},
- file = {:Users/ryan/Documents/Mendeley Desktop/Gevaert - 2015 - MethylMix An R package for identifying DNA methylation-driven genes.pdf:pdf},
- isbn = {1367-4803, 1460-2059},
- issn = {14602059},
- journal = {Bioinformatics},
- number = {11},
- pages = {1839--1841},
- pmid = {25609794},
- title = {{MethylMix: An R package for identifying DNA methylation-driven genes}},
- url = {http://bioinformatics.oxfordjournals.org/content/31/11/1839.full.pdf},
- volume = {31},
- year = {2015}
- }
- @article{Mutz2012,
- abstract = {Up to date research in biology, biotechnology, and medicine requires fast genome and transcriptome analysis technologies for the investigation of cellular state, physiology, and activity. Here, microarray technology and next generation sequencing of transcripts (RNA-Seq) are state of the art. Since microarray technology is limited towards the amount of RNA, the quantification of transcript levels and the sequence information, RNA-Seq provides nearly unlimited possibilities in modern bioanalysis. This chapter presents a detailed description of next-generation sequencing (NGS), describes the impact of this technology on transcriptome analysis and explains its possibilities to explore the modern RNA world.},
- author = {Mutz, KO and Heilkenbrinker, Alexandra and L{\"{o}}nne, Maren},
- doi = {10.1016/j.copbio.2012.09.004},
- journal = {Current Opinion in {\ldots}},
- month = {feb},
- number = {1},
- pages = {22--30},
- title = {{Transcriptome analysis using next-generation sequencing}},
- url = {http://www.sciencedirect.com/science/article/pii/S0958166912001310},
- volume = {24},
- year = {2012}
- }
- @article{McCormick2010,
- abstract = {The analysis of climate data has relied heavily on hypothesis-driven statistical methods, while projections of future climate are based primarily on physics-based computational models. However, in recent years a wealth of new datasets has become available. Therefore, we take a more data-centric approach and propose a unified framework for studying climate, with an aim toward characterizing observed phenomena as well as discovering new knowledge in climate science. Specifically, we posit that complex networks are well suited for both descriptive analysis and predictive modeling tasks. We show that the structural properties of 'climate networks' have useful interpretation within the domain. Further, we extract clusters from these networks and demonstrate their predictive power as climate indices. Our experimental results establish that the network clusters are statistically significantly better predictors than clusters derived using a more traditional clustering approach. Using complex networks as data representation thus enables the unique opportunity for descriptive and predictive modeling to inform each other. 2010 Wiley Periodicals, Inc.},
- archivePrefix = {arXiv},
- arxivId = {1206.3552},
- author = {McCormick, Tyler H. and Ferrell, Rebecca and Karr, Alan F. and Ryan, Patrick B.},
- doi = {10.1002/sam},
- eprint = {1206.3552},
- file = {:Users/ryan/Documents/Mendeley Desktop/McCormick et al. - 2010 - Complex Networks as a Unified Framework for Descriptive Analysis and Predictive Modeling in Climate Science.pdf:pdf},
- isbn = {1932-1872},
- issn = {19321872},
- journal = {Science And Technology},
- keywords = {climate data,community detection,complex networks,multivariate predictive modeling,network analysis},
- number = {5},
- pages = {497--511},
- pmid = {21824845},
- title = {{Complex Networks as a Unified Framework for Descriptive Analysis and Predictive Modeling in Climate Science}},
- url = {http://onlinelibrary.wiley.com/store/10.1002/sam.11271/asset/sam11271.pdf?v=1{\&}t=iwmptjph{\&}s=ca945af6209386af9ff6ae0afabc917b905fd717},
- volume = {4},
- year = {2010}
- }
- @article{Eksi2013,
- author = {Eksi, Ridvan and Li, Hong-Dong and Menon, Rajasree and Wen, Yuchen and Omenn, Gilbert S. and Kretzler, Matthias and Guan, Yuanfang},
- doi = {10.1371/journal.pcbi.1003314},
- editor = {Iakoucheva, Lilia M.},
- file = {:Users/ryan/Documents/Mendeley Desktop/Eksi et al. - 2013 - Systematically Differentiating Functions for Alternatively Spliced Isoforms through Integrating RNA-seq Data.pdf:pdf},
- issn = {1553-7358},
- journal = {PLoS Computational Biology},
- month = {nov},
- number = {11},
- pages = {e1003314},
- title = {{Systematically Differentiating Functions for Alternatively Spliced Isoforms through Integrating RNA-seq Data}},
- url = {http://dx.plos.org/10.1371/journal.pcbi.1003314},
- volume = {9},
- year = {2013}
- }
- @article{Lu2005,
- abstract = {In testing for differential gene expression involving multiple serial analysis of gene expression (SAGE) libraries, it is critical to account for both between and within library variation. Several methods have been proposed, including the t test, tw test, and an overdispersed logistic regression approach. The merits of these tests, however, have not been fully evaluated. Questions still remain on whether further improvements can be made.},
- author = {Lu, Jun and Tomfohr, John K and Kepler, Thomas B},
- doi = {10.1186/1471-2105-6-165},
- file = {:Users/ryan/Documents/Mendeley Desktop/Lu, Tomfohr, Kepler - 2005 - Identifying differential expression in multiple SAGE libraries an overdispersed log-linear model approach.pdf:pdf},
- isbn = {1471210561},
- issn = {1471-2105},
- journal = {BMC bioinformatics},
- keywords = {Carcinoma, Pancreatic Ductal,Carcinoma, Pancreatic Ductal: genetics,Cell Line, Tumor,Gene Expression Profiling,Gene Expression Profiling: methods,Gene Expression Profiling: statistics {\&} numerical,Gene Library,Humans,Internet,Linear Models,Pancreatic Neoplasms,Pancreatic Neoplasms: genetics,RNA, Messenger,RNA, Messenger: analysis,ROC Curve,User-Computer Interface},
- month = {jan},
- pages = {165},
- pmid = {15987513},
- title = {{Identifying differential expression in multiple SAGE libraries: an overdispersed log-linear model approach.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=1189357{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {6},
- year = {2005}
- }
- @article{Trapnell2012,
- author = {Trapnell, Cole and Hendrickson, David G and Sauvageau, Martin and Goff, Loyal and Rinn, John L and Pachter, Lior},
- doi = {10.1038/nbt.2450},
- file = {:Users/ryan/Documents/Mendeley Desktop/Trapnell et al. - 2012 - Differential analysis of gene regulation at transcript resolution with RNA-seq.pdf:pdf},
- issn = {1087-0156},
- journal = {Nature Biotechnology},
- month = {dec},
- number = {December},
- pages = {1--9},
- publisher = {Nature Publishing Group},
- title = {{Differential analysis of gene regulation at transcript resolution with RNA-seq}},
- url = {http://www.nature.com/doifinder/10.1038/nbt.2450},
- year = {2012}
- }
- @article{Kasowski2010,
- author = {Kasowski, Maya and Grubert, Fabian and Heffelfinger, Christopher},
- file = {:Users/ryan/Documents/Mendeley Desktop/Kasowski, Grubert, Heffelfinger - 2010 - Variation in transcription factor binding among humans.pdf:pdf},
- journal = {science},
- pages = {5--12},
- title = {{Variation in transcription factor binding among humans}},
- url = {http://www.sciencemag.org/content/328/5975/232.short},
- year = {2010}
- }
- @article{Li2012a,
- abstract = {MOTIVATION: RNA-Seq uses the high-throughput sequencing technology to identify and quantify transcriptome at an unprecedented high resolution and low cost. However, RNA-Seq reads are usually not uniformly distributed and biases in RNA-Seq data post great challenges in many applications including transcriptome assembly and the expression level estimation of genes or isoforms. Much effort has been made in the literature to calibrate the expression level estimation from biased RNA-Seq data, but the effect of biases on transcriptome assembly remains largely unexplored. RESULTS: Here, we propose a statistical framework for both transcriptome assembly and isoform expression level estimation from biased RNA-Seq data. Using a quasi-multinomial distribution model, our method is able to capture various types of RNA-Seq biases, including positional, sequencing and mappability biases. Our experimental results on simulated and real RNA-Seq datasets exhibit interesting effects of RNA-Seq biases on both transcriptome assembly and isoform expression level estimation. The advantage of our method is clearly shown in the experimental analysis by its high sensitivity and precision in transcriptome assembly and the high concordance of its estimated expression levels with qRT-PCR data. AVAILABILITY: CEM is freely available at http://www.cs.ucr.edu/{\~{}}liw/cem.html CONTACT: liw@cs.ucr.edu.},
- author = {Li, Wei and Jiang, Tao},
- doi = {10.1093/bioinformatics/bts559},
- file = {:Users/ryan/Documents/Mendeley Desktop/Li, Jiang - 2012 - Transcriptome Assembly and Isoform Expression Level Estimation from Biased RNA-Seq Reads.pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- month = {oct},
- number = {22},
- pages = {2914--2921},
- pmid = {23060617},
- title = {{Transcriptome Assembly and Isoform Expression Level Estimation from Biased RNA-Seq Reads.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/23060617},
- volume = {28},
- year = {2012}
- }
- @article{Hart2013,
- abstract = {BACKGROUND: Early application of second-generation sequencing technologies to transcript quantitation (RNA-seq) has hinted at a vast mammalian transcriptome, including transcripts from nearly all known genes, which might be fully measured only by ultradeep sequencing. Subsequent studies suggested that low-abundance transcripts might be the result of technical or biological noise rather than active transcripts; moreover, most RNA-seq experiments did not provide enough read depth to generate high-confidence estimates of gene expression for low-abundance transcripts. As a result, the community adopted several heuristics for RNA-seq analysis, most notably an arbitrary expression threshold of 0.3 - 1 FPKM for downstream analysis. However, advances in RNA-seq library preparation, sequencing technology, and informatic analysis have addressed many of the systemic sources of uncertainty and undermined the assumptions that drove the adoption of these heuristics. We provide an updated view of the accuracy and efficiency of RNA-seq experiments, using genomic data from large-scale studies like the ENCODE project to provide orthogonal information against which to validate our conclusions.
- RESULTS: We show that a human cell's transcriptome can be divided into active genes carrying out the work of the cell and other genes that are likely the by-products of biological or experimental noise. We use ENCODE data on chromatin state to show that ultralow-expression genes are predominantly associated with repressed chromatin; we provide a novel normalization metric, zFPKM, that identifies the threshold between active and background gene expression; and we show that this threshold is robust to experimental and analytical variations.
- CONCLUSIONS: The zFPKM normalization method accurately separates the biologically relevant genes in a cell, which are associated with active promoters, from the ultralow-expression noisy genes that have repressed promoters. A read depth of twenty to thirty million mapped reads allows high-confidence quantitation of genes expressed at this threshold, providing important guidance for the design of RNA-seq studies of gene expression. Moreover, we offer an example for using extensive ENCODE chromatin state information to validate RNA-seq analysis pipelines.},
- author = {Hart, Traver and Komori, H Kiyomi and LaMere, Sarah and Podshivalova, Katie and Salomon, Daniel R},
- doi = {10.1186/1471-2164-14-778},
- file = {:Users/ryan/Documents/Mendeley Desktop/Hart et al. - 2013 - Finding the active genes in deep RNA-seq gene expression studies.pdf:pdf},
- issn = {1471-2164},
- journal = {BMC genomics},
- month = {jan},
- pages = {778},
- pmid = {24215113},
- title = {{Finding the active genes in deep RNA-seq gene expression studies.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3870982{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {14},
- year = {2013}
- }
- @article{Turner2017,
- author = {Turner, Isaac and Garimella, Kiran V},
- doi = {10.1101/147777},
- file = {:Users/ryan/Documents/Mendeley Desktop/Turner, Garimella - 2017 - Integrating long-range connectivity information into de Bruijn graphs.pdf:pdf},
- title = {{Integrating long-range connectivity information into de Bruijn graphs}},
- url = {http://www.biorxiv.org/content/biorxiv/early/2017/06/09/147777.full.pdf},
- year = {2017}
- }
- @article{Tanner2007,
- abstract = {Annotation of protein-coding genes is a key goal of genome sequencing projects. In spite of tremendous recent advances in computational gene finding, comprehensive annotation remains a challenge. Peptide mass spectrometry is a powerful tool for researching the dynamic proteome and suggests an attractive approach to discover and validate protein-coding genes. We present algorithms to construct and efficiently search spectra against a genomic database, with no prior knowledge of encoded proteins. By searching a corpus of 18.5 million tandem mass spectra (MS/MS) from human proteomic samples, we validate 39,000 exons and 11,000 introns at the level of translation. We present translation-level evidence for novel or extended exons in 16 genes, confirm translation of 224 hypothetical proteins, and discover or confirm over 40 alternative splicing events. Polymorphisms are efficiently encoded in our database, allowing us to observe variant alleles for 308 coding SNPs. Finally, we demonstrate the use of mass spectrometry to improve automated gene prediction, adding 800 correct exons to our predictions using a simple rescoring strategy. Our results demonstrate that proteomic profiling should play a role in any genome sequencing project.},
- author = {Tanner, Stephen and Shen, Zhouxin and Ng, Julio and Florea, Liliana and Guig{\'{o}}, Roderic and Briggs, Steven P and Bafna, Vineet},
- doi = {10.1101/gr.5646507},
- file = {:Users/ryan/Documents/Mendeley Desktop/Tanner et al. - 2007 - Improving gene annotation using peptide mass spectrometry.pdf:pdf},
- issn = {1088-9051},
- journal = {Genome research},
- keywords = {Algorithms,Alternative Splicing,Amino Acid Sequence,Databases, Protein,Exons,Genetic Techniques,Humans,Introns,Mass Spectrometry,Mass Spectrometry: methods,Molecular Sequence Data,Peptides,Peptides: chemistry,Peptides: genetics,Polymorphism, Single Nucleotide,Protein Array Analysis,Protein Array Analysis: methods,Protein Array Analysis: statistics {\&} numerical dat,Proteomics,Proteomics: methods,Proteomics: statistics {\&} numerical data,Sequence Alignment},
- month = {feb},
- number = {2},
- pages = {231--9},
- pmid = {17189379},
- title = {{Improving gene annotation using peptide mass spectrometry.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=1781355{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {17},
- year = {2007}
- }
- @article{Liu2013,
- author = {Liu, Yun and Aryee, Martin J and Padyukov, Leonid and Fallin, M Daniele and Hesselberg, Espen and Runarsson, Arni and Reinius, Lovisa and Acevedo, Nathalie and Taub, Margaret and Ronninger, Marcus and Shchetynsky, Klementy and Scheynius, Annika and Kere, Juha and Alfredsson, Lars and Klareskog, Lars and Ekstr{\"{o}}m, Tomas J and Feinberg, Andrew P},
- doi = {10.1038/nbt.2487},
- file = {:Users/ryan/Documents/Mendeley Desktop/Liu et al. - 2013 - Epigenome-wide association data implicate DNA methylation as an intermediary of genetic risk in rheumatoid arthri(2).pdf:pdf},
- issn = {1087-0156},
- journal = {Nature Biotechnology},
- month = {jan},
- number = {November 2012},
- pages = {142--147},
- title = {{Epigenome-wide association data implicate DNA methylation as an intermediary of genetic risk in rheumatoid arthritis}},
- url = {http://www.nature.com/doifinder/10.1038/nbt.2487},
- volume = {31},
- year = {2013}
- }
- @article{Gibbs2007,
- abstract = {The rhesus macaque (Macaca mulatta) is an abundant primate species that diverged from the ancestors of Homo sapiens about 25 million years ago. Because they are genetically and physiologically similar to humans, rhesus monkeys are the most widely used nonhuman primate in basic and applied biomedical research. We determined the genome sequence of an Indian-origin Macaca mulatta female and compared the data with chimpanzees and humans to reveal the structure of ancestral primate genomes and to identify evidence for positive selection and lineage-specific expansions and contractions of gene families. A comparison of sequences from individual animals was used to investigate their underlying genetic diversity. The complete description of the macaque genome blueprint enhances the utility of this animal model for biomedical research and improves our understanding of the basic biology of the species.},
- annote = {From Duplicate 2 (
- Evolutionary and biomedical insights from the rhesus macaque genome.
- - Gibbs, Richard a; Rogers, Jeffrey; Katze, Michael G; Bumgarner, Roger; Weinstock, George M; Mardis, Elaine R; Remington, Karin a; Strausberg, Robert L; Venter, J Craig; Wilson, Richard K; Batzer, Mark a; Bustamante, Carlos D; Eichler, Evan E; Hahn, Matthew W; Hardison, Ross C; Makova, Kateryna D; Miller, Webb; Milosavljevic, Aleksandar; Palermo, Robert E; Siepel, Adam; Sikela, James M; Attaway, Tony; Bell, Stephanie; Bernard, Kelly E; Buhay, Christian J; Chandrabose, Mimi N; Dao, Marvin; Davis, Clay; Delehaunty, Kimberly D; Ding, Yan; Dinh, Huyen H; Dugan-Rocha, Shannon; Fulton, Lucinda a; Gabisi, Ramatu Ayiesha; Garner, Toni T; Godfrey, Jennifer; Hawes, Alicia C; Hernandez, Judith; Hines, Sandra; Holder, Michael; Hume, Jennifer; Jhangiani, Shalini N; Joshi, Vandita; Khan, Ziad Mohid; Kirkness, Ewen F; Cree, Andrew; Fowler, R Gerald; Lee, Sandra; Lewis, Lora R; Li, Zhangwan; Liu, Yih-Shin; Moore, Stephanie M; Muzny, Donna; Nazareth, Lynne V; Ngo, Dinh Ngoc; Okwuonu, Geoffrey O; Pai, Grace; Parker, David; Paul, Heidie a; Pfannkoch, Cynthia; Pohl, Craig S; Rogers, Yu-Hui; Ruiz, San Juana; Sabo, Aniko; Santibanez, Jireh; Schneider, Brian W; Smith, Scott M; Sodergren, Erica; Svatek, Amanda F; Utterback, Teresa R; Vattathil, Selina; Warren, Wesley; White, Courtney Sherell; Chinwalla, Asif T; Feng, Yucheng; Halpern, Aaron L; Hillier, Ladeana W; Huang, Xiaoqiu; Minx, Pat; Nelson, Joanne O; Pepin, Kymberlie H; Qin, Xiang; Sutton, Granger G; Venter, Eli; Walenz, Brian P; Wallis, John W; Worley, Kim C; Yang, Shiaw-Pyng; Jones, Steven M; Marra, Marco a; Rocchi, Mariano; Schein, Jacqueline E; Baertsch, Robert; Clarke, Laura; Cs{\"{u}}r{\"{o}}s, Mikl{\'{o}}s; Glasscock, Jarret; Harris, R Alan; Havlak, Paul; Jackson, Andrew R; Jiang, Huaiyang; Liu, Yue; Messina, David N; Shen, Yufeng; Song, Henry Xing-Zhi; Wylie, Todd; Zhang, Lan; Birney, Ewan; Han, Kyudong; Konkel, Miriam K; Lee, Jungnam; Smit, Arian F a; Ullmer, Brygg; Wang, Hui; Xing, Jinchuan; Burhans, Richard; Cheng, Ze; Karro, John E; Ma, Jian; Raney, Brian; She, Xinwei; Cox, Michael J; Demuth, Jeffery P; Dumas, Laura J; Han, Sang-Gook; Hopkins, Janet; Karimpour-Fard, Anis; Kim, Young H; Pollack, Jonathan R; Vinar, Tomas; Addo-Quaye, Charles; Degenhardt, Jeremiah; Denby, Alexandra; Hubisz, Melissa J; Indap, Amit; Kosiol, Carolin; Lahn, Bruce T; Lawson, Heather a; Marklein, Alison; Nielsen, Rasmus; Vallender, Eric J; Clark, Andrew G; Ferguson, Betsy; Hernandez, Ryan D; Hirani, Kashif; Kehrer-Sawatzki, Hildegard; Kolb, Jessica; Patil, Shobha; Pu, Ling-Ling; Ren, Yanru; Smith, David Glenn; Wheeler, David a; Schenck, Ian; Ball, Edward V; Chen, Rui; Cooper, David N; Giardine, Belinda; Hsu, Fan; Kent, W James; Lesk, Arthur; Nelson, David L; O'brien, William E; Pr{\"{u}}fer, Kay; Stenson, Peter D; Wallace, James C; Ke, Hui; Liu, Xiao-Ming; Wang, Peng; Xiang, Andy Peng; Yang, Fan; Barber, Galt P; Haussler, David; Karolchik, Donna; Kern, Andy D; Kuhn, Robert M; Smith, Kayla E; Zwieg, Ann S )
- And Duplicate 3 (
- Evolutionary and biomedical insights from the rhesus macaque genome.
- - Gibbs, Richard a; Rogers, Jeffrey; Katze, Michael G; Bumgarner, Roger; Weinstock, George M; Mardis, Elaine R; Remington, Karin a; Strausberg, Robert L; Venter, J Craig; Wilson, Richard K; Batzer, Mark a; Bustamante, Carlos D; Eichler, Evan E; Hahn, Matthew W; Hardison, Ross C; Makova, Kateryna D; Miller, Webb; Milosavljevic, Aleksandar; Palermo, Robert E; Siepel, Adam; Sikela, James M; Attaway, Tony; Bell, Stephanie; Bernard, Kelly E; Buhay, Christian J; Chandrabose, Mimi N; Dao, Marvin; Davis, Clay; Delehaunty, Kimberly D; Ding, Yan; Dinh, Huyen H; Dugan-Rocha, Shannon; Fulton, Lucinda a; Gabisi, Ramatu Ayiesha; Garner, Toni T; Godfrey, Jennifer; Hawes, Alicia C; Hernandez, Judith; Hines, Sandra; Holder, Michael; Hume, Jennifer; Jhangiani, Shalini N; Joshi, Vandita; Khan, Ziad Mohid; Kirkness, Ewen F; Cree, Andrew; Fowler, R Gerald; Lee, Sandra; Lewis, Lora R; Li, Zhangwan; Liu, Yih-Shin Yue; Moore, Stephanie M; Muzny, Donna; Nazareth, Lynne V; Ngo, Dinh Ngoc; Okwuonu, Geoffrey O; Pai, Grace; Parker, David; Paul, Heidie a; Pfannkoch, Cynthia; Pohl, Craig S; Rogers, Yu-Hui; Ruiz, San Juana; Sabo, Aniko; Santibanez, Jireh; Schneider, Brian W; Smith, Scott M; Sodergren, Erica; Svatek, Amanda F; Utterback, Teresa R; Vattathil, Selina; Warren, Wesley; White, Courtney Sherell; Chinwalla, Asif T; Feng, Yucheng; Halpern, Aaron L; Hillier, Ladeana W; Huang, Xiaoqiu; Minx, Pat; Nelson, Joanne O; Pepin, Kymberlie H; Qin, Xiang; Sutton, Granger G; Venter, Eli; Walenz, Brian P; Wallis, John W; Worley, Kim C; Yang, Shiaw-Pyng; Jones, Steven M; Marra, Marco a; Rocchi, Mariano; Schein, Jacqueline E; Baertsch, Robert; Clarke, Laura; Cs{\"{u}}r{\"{o}}s, Mikl{\'{o}}s; Glasscock, Jarret; Harris, R Alan; Havlak, Paul; Jackson, Andrew R; Jiang, Huaiyang; Messina, David N; Shen, Yufeng; Song, Henry Xing-Zhi; Wylie, Todd; Zhang, Lan; Birney, Ewan; Han, Kyudong; Konkel, Miriam K; Lee, Jungnam; Smit, Arian F a; Ullmer, Brygg; Wang, Hui; Xing, Jinchuan; Burhans, Richard; Cheng, Ze; Karro, John E; Ma, Jian; Raney, Brian; She, Xinwei; Cox, Michael J; Demuth, Jeffery P; Dumas, Laura J; Han, Sang-Gook; Hopkins, Janet; Karimpour-Fard, Anis; Kim, Young H; Pollack, Jonathan R; Vinar, Tomas; Addo-Quaye, Charles; Degenhardt, Jeremiah; Denby, Alexandra; Hubisz, Melissa J; Indap, Amit; Kosiol, Carolin; Lahn, Bruce T; Lawson, Heather a; Marklein, Alison; Nielsen, Rasmus; Vallender, Eric J; Clark, Andrew G; Ferguson, Betsy; Hernandez, Ryan D; Hirani, Kashif; Kehrer-Sawatzki, Hildegard; Kolb, Jessica; Patil, Shobha; Pu, Ling-Ling; Ren, Yanru; Smith, David Glenn; Wheeler, David a; Schenck, Ian; Ball, Edward V; Chen, Rui; Cooper, David N; Giardine, Belinda; Hsu, Fan; Kent, W James; Lesk, Arthur; Nelson, David L; O'brien, William E; Pr{\"{u}}fer, Kay; Stenson, Peter D; Wallace, James C; Ke, Hui; Liu, Xiao-Ming; Wang, Peng; Xiang, Andy Peng; Yang, Fan; Barber, Galt P; Haussler, David; Karolchik, Donna; Kern, Andy D; Kuhn, Robert M; Smith, Kayla E; Zwieg, Ann S )
- },
- author = {Gibbs, Richard a and Rogers, J. and Katze, Michael G and Bumgarner, Roger and Weinstock, George M and Mardis, Elaine R and Remington, Karin a and Strausberg, Robert L and Venter, J. C. and Wilson, Richard K and Batzer, Mark a and Bustamante, Carlos D and Eichler, Evan E and Hahn, Matthew W and Hardison, Ross C and Makova, Kateryna D and Miller, Webb and Milosavljevic, Aleksandar and Palermo, Robert E and Siepel, Adam and Sikela, James M and Attaway, Tony and Bell, Stephanie and Bernard, Kelly E and Buhay, Christian J and Chandrabose, Mimi N and Dao, Marvin and Davis, Clay and Delehaunty, Kimberly D and Ding, Yan and Dinh, Huyen H and Dugan-Rocha, Shannon and Fulton, Lucinda a and Gabisi, Ramatu Ayiesha and Garner, Toni T and Godfrey, Jennifer and Hawes, Alicia C and Hernandez, J. and Hines, Sandra and Holder, Michael and Hume, Jennifer and Jhangiani, Shalini N and Joshi, Vandita and Khan, Ziad Mohid and Kirkness, Ewen F and Cree, Andrew and Fowler, R Gerald and Lee, S. and Lewis, Lora R and Li, Zhangwan and Liu, Y.-s. and Moore, Stephanie M and Muzny, Donna and Nazareth, Lynne V and Ngo, Dinh Ngoc and Okwuonu, Geoffrey O and Pai, Grace and Parker, David and Paul, Heidie a and Pfannkoch, Cynthia and Pohl, Craig S and Rogers, Y.-H. and Ruiz, San Juana and Sabo, Aniko and Santibanez, Jireh and Schneider, Brian W and Smith, S. M. and Sodergren, Erica and Svatek, Amanda F and Utterback, Teresa R and Vattathil, Selina and Warren, Wesley and White, Courtney Sherell and Chinwalla, Asif T and Feng, Yucheng and Halpern, Aaron L and Hillier, Ladeana W and Huang, Xiaoqiu and Minx, Pat and Nelson, J. O. and Pepin, Kymberlie H and Qin, Xiang and Sutton, Granger G and Venter, Eli and Walenz, Brian P and Wallis, John W and Worley, Kim C and Yang, S.-P. and Jones, Steven M and Marra, Marco a and Rocchi, Mariano and Schein, Jacqueline E and Baertsch, Robert and Clarke, Laura and Csuros, M. and Glasscock, Jarret and Harris, R Alan and Havlak, Paul and Jackson, Andrew R and Jiang, Huaiyang and Liu, Y. and Messina, David N and Shen, Yufeng and Song, Henry X.-Z. and Wylie, Todd and Zhang, Lan and Birney, Ewan and Han, K. and Konkel, Miriam K and Lee, Jungnam and Smit, Arian F a and Ullmer, Brygg and Wang, H. and Xing, Jinchuan and Burhans, Richard and Cheng, Ze and Karro, John E and Ma, Jian and Raney, Brian and She, Xinwei and Cox, Michael J and Demuth, Jeffery P and Dumas, Laura J and Han, S.-G. and Hopkins, Janet and Karimpour-Fard, Anis and Kim, Young H and Pollack, Jonathan R and Vinar, Tomas and Addo-Quaye, Charles and Degenhardt, Jeremiah and Denby, Alexandra and Hubisz, Melissa J and Indap, Amit and Kosiol, Carolin and Lahn, Bruce T and Lawson, Heather a and Marklein, Alison and Nielsen, Rasmus and Vallender, Eric J and Clark, Andrew G and Ferguson, Betsy and Hernandez, Ryan D and Hirani, Kashif and Kehrer-Sawatzki, Hildegard and Kolb, Jessica and Patil, Shobha and Pu, L.-L. and Ren, Yanru and Smith, D. G. and Wheeler, David a and Schenck, Ian and Ball, Edward V and Chen, Rui and Cooper, David N and Giardine, Belinda and Hsu, Fan and Kent, W James and Lesk, Arthur and Nelson, David L and O'Brien, W. E. and Prufer, K. and Stenson, Peter D and Wallace, James C and Ke, Hui and Liu, X.-M. and Wang, Peng and Xiang, Andy Peng and Yang, Fan and Barber, Galt P and Haussler, David and Karolchik, Donna and Kern, Andy D and Kuhn, Robert M and Smith, Kayla E and Zwieg, Ann S},
- doi = {10.1126/science.1139247},
- file = {:Users/ryan/Documents/Mendeley Desktop/Gibbs et al. - 2007 - Evolutionary and Biomedical Insights from the Rhesus Macaque Genome.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Gibbs et al. - 2007 - Evolutionary and Biomedical Insights from the Rhesus Macaque Genome.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Gibbs et al. - 2007 - Evolutionary and Biomedical Insights from the Rhesus Macaque Genome(3).pdf:pdf},
- isbn = {1377298973},
- issn = {0036-8075},
- journal = {Science},
- keywords = {Animals,Biomedical Research,DNA,Evolution,Female,Gene Duplication,Gene Rearrangement,Genetic Diseases,Genetic Variation,Genome,Humans,Inborn,Macaca mulatta,Macaca mulatta: genetics,Male,Molecular,Multigene Family,Mutation,Pan troglodytes,Pan troglodytes: genetics,Sequence Analysis,Species Specificity},
- month = {apr},
- number = {5822},
- pages = {222--234},
- pmid = {17431167},
- title = {{Evolutionary and Biomedical Insights from the Rhesus Macaque Genome}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/17431167 http://www.sciencemag.org/cgi/doi/10.1126/science.1139247},
- volume = {316},
- year = {2007}
- }
- @article{NgocTamTran2014,
- author = {{Ngoc Tam Tran} and Huang, Chun-Hsi},
- doi = {10.4137/GRSB.S13612},
- file = {:Users/ryan/Documents/Mendeley Desktop/Ngoc Tam Tran, Huang - 2014 - Gene Expression and Gene Ontology Enrichment Analysis for H3K4me3 and H3K4me1 in Mouse Liver and Mouse Emb.pdf:pdf},
- issn = {1177-6250},
- journal = {Gene Regulation and Systems Biology},
- keywords = {10,2013,4137,43 doi,8 33,accepted for publication,biology 2014,chip-seq,citation,december 11,december 9,embryonic stem cell,gene expression,gene expression and gene,gene ontology,gene regulation and systems,grsb,h3k4me1,h3k4me3,h3k4me3 and h3k4me1 in,mouse liver and mouse,november 10,ontology enrichment analysis for,received,resubmitted,rna-seq,s13612,tran and huang,using chip-seq and rna-seq},
- pages = {33},
- title = {{Gene Expression and Gene Ontology Enrichment Analysis for H3K4me3 and H3K4me1 in Mouse Liver and Mouse Embryonic Stem Cell Using ChIP-Seq and RNA-Seq}},
- url = {http://www.la-press.com/gene-expression-and-gene-ontology-enrichment-analysis-for-h3k4me3-and--article-a4039},
- year = {2014}
- }
- @article{Ebeling2011b,
- abstract = {The long-tailed macaque, also referred to as cynomolgus monkey (Macaca fascicularis), is one of the most important nonhuman primate animal models in basic and applied biomedical research. To improve the predictive power of primate experiments for humans, we determined the genome sequence of a Macaca fascicularis female of Mauritian origin using a whole-genome shotgun sequencing approach. We applied a template switch strategy that uses either the rhesus or the human genome to assemble sequence reads. The sixfold sequence coverage of the draft genome sequence enabled discovery of about 2.1 million potential single-nucleotide polymorphisms based on occurrence of a dimorphic nucleotide at a given position in the genome sequence. Homology-based annotation allowed us to identify 17,387 orthologs of human protein-coding genes in the M. fascicularis draft genome, and the predicted transcripts enabled the design of a M. fascicularis-specific gene expression microarray. Using liver samples from 36 individuals of different geographic origin we identified 718 genes with highly variable expression in liver, whereas the majority of the transcriptome shows relatively stable and comparable expression. Knowledge of the M. fascicularis draft genome is an important contribution to both the use of this animal in disease models and the safety assessment of drugs and their metabolites. In particular, this information allows high-resolution genotyping and microarray-based gene-expression profiling for animal stratification, thereby allowing the use of well-characterized animals for safety testing. Finally, the genome sequence presented here is a significant contribution to the global "3R" animal welfare initiative, which has the goal to reduce, refine, and replace animal experiments.},
- annote = {From Duplicate 1 (
- Genome-based analysis of the nonhuman primate Macaca fascicularis as a model for drug safety assessment.
- - Ebeling, Martin; K{\"{u}}ng, Erich; See, Angela; Broger, Clemens; Steiner, Guido; Berrera, Marco; Heckel, Tobias; Iniguez, Leonardo; Albert, Thomas; Schmucki, Roland; Biller, Hermann; Singer, Thomas; Certa, Ulrich )
- },
- author = {Ebeling, Martin and K{\"{u}}ng, Erich and See, Angela and Broger, Clemens and Steiner, Guido and Berrera, Marco and Heckel, Tobias and Iniguez, Leonardo and Albert, Thomas and Schmucki, Roland and Biller, Hermann and Singer, Thomas and Certa, Ulrich},
- doi = {10.1101/gr.123117.111},
- file = {:Users/ryan/Documents/Mendeley Desktop/Ebeling et al. - 2011 - Genome-based analysis of the nonhuman primate Macaca fascicularis as a model for drug safety assessment(2).pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Ebeling et al. - 2011 - Genome-based analysis of the nonhuman primate Macaca fascicularis as a model for drug safety assessment(3).pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Ebeling et al. - 2011 - Genome-based analysis of the nonhuman primate Macaca fascicularis as a model for drug safety assessment(3).pdf:pdf},
- issn = {1549-5469},
- journal = {Genome research},
- keywords = {Animal,Animals,Cytochrome P-450 Enzyme System,Cytochrome P-450 Enzyme System: genetics,Cytokines,Cytokines: genetics,DNA,DNA: genetics,DNA: isolation {\&} purification,Drug Evaluation,Female,Gene Expression Profiling,Gene Expression Profiling: methods,Genetic,Genome,High-Throughput Nucleotide Sequencing,Humans,Liver,Liver: metabolism,Macaca fascicularis,Macaca fascicularis: genetics,Models,Nucleic Acid,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Organic Anion Transporters,Organic Anion Transporters: genetics,Phylogeny,Polymorphism,Preclinical,Sequence Analysis,Sequence Homology,Single Nucleotide,Transcription,cyno-genome,cyno-project},
- mendeley-tags = {cyno-genome,cyno-project},
- month = {oct},
- number = {10},
- pages = {1746--56},
- pmid = {21862625},
- title = {{Genome-based analysis of the nonhuman primate Macaca fascicularis as a model for drug safety assessment.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3202291{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {21},
- year = {2011}
- }
- @article{Zhou2013a,
- abstract = {A popular approach for comparing gene expression levels between (replicated) conditions of RNA sequencing data relies on counting reads that map to features of interest. Within such count-based methods, many flexible and advanced statistical approaches now exist and offer the ability to adjust for covariates (e.g., batch effects). Often, these methods include some sort of (sharing of information) across features to improve inferences in small samples. It is important to achieve an appropriate tradeoff between statistical power and protection against outliers. Here, we study the robustness of existing approaches for count-based differential expression analysis and propose a new strategy based on observation weights that can be used within existing frameworks. The results suggest that outliers can have a global effect on differential analyses. We demonstrate the effectiveness of our new approach with real data and simulated data that reflects properties of real datasets (e.g., dispersion-mean trend) and develop an extensible framework for comprehensive testing of current and future methods. In addition, we explore the origin of such outliers, in some cases highlighting additional biological or technical factors within the experiment. Further details can be downloaded from the project website: http://imlspenticton.uzh.ch/robinson{\_}lab/edgeR{\_}robust/},
- archivePrefix = {arXiv},
- arxivId = {1312.3382},
- author = {Zhou, Xiaobei and Lindsay, Helen and Robinson, Mark D},
- eprint = {1312.3382},
- file = {:Users/ryan/Documents/Mendeley Desktop/Zhou, Lindsay, Robinson - 2013 - Robustly detecting differential expression in RNA sequencing data using observation weights.pdf:pdf},
- month = {dec},
- pages = {18},
- title = {{Robustly detecting differential expression in RNA sequencing data using observation weights}},
- url = {http://arxiv.org/abs/1312.3382},
- year = {2013}
- }
- @article{Wu2012a,
- abstract = {Recent developments in RNA-sequencing (RNA-seq) technology have led to a rapid increase in gene expression data in the form of counts. RNA-seq can be used for a variety of applications, however, identifying differential expression (DE) remains a key task in functional genomics. There have been a number of statistical methods for DE detection for RNA-seq data. One common feature of several leading methods is the use of the negative binomial (Gamma-Poisson mixture) model. That is, the unobserved gene expression is modeled by a gamma random variable and, given the expression, the sequencing read counts are modeled as Poisson. The distinct feature in various methods is how the variance, or dispersion, in the Gamma distribution is modeled and estimated. We evaluate several large public RNA-seq datasets and find that the estimated dispersion in existing methods does not adequately capture the heterogeneity of biological variance among samples. We present a new empirical Bayes shrinkage estimate of the dispersion parameters and demonstrate improved DE detection.},
- author = {Wu, Hao and Wang, Chi and Wu, Zhijin},
- doi = {10.1093/biostatistics/kxs033},
- file = {:Users/ryan/Documents/Mendeley Desktop/Wu, Wang, Wu - 2013 - A new shrinkage estimator for dispersion improves differential expression detection in RNA-seq data(4).pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Wu, Wang, Wu - 2013 - A new shrinkage estimator for dispersion improves differential expression detection in RNA-seq data.pdf:pdf},
- issn = {1468-4357},
- journal = {Biostatistics (Oxford, England)},
- keywords = {Bayes Theorem,Binomial Distribution,Biostatistics,Databases,Gene Expression Profiling,Gene Expression Profiling: statistics {\&} numerical,Humans,Models,Nucleic Acid,Nucleic Acid: statistics {\&} numerical da,Nucleic Acid: statistics {\&} numerical data,Poisson Distribution,RNA,RNA: statistics {\&} numerical dat,RNA: statistics {\&} numerical data,Sequence Analysis,Statistical},
- month = {apr},
- number = {2},
- pages = {232--43},
- pmid = {23001152},
- title = {{A new shrinkage estimator for dispersion improves differential expression detection in RNA-seq data.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/23001152 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3590927{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {14},
- year = {2013}
- }
- @article{Wu2010c,
- abstract = {BACKGROUND: Recent technological advancements have made high throughput sequencing an increasingly popular approach for transcriptome analysis. Advantages of sequencing-based transcriptional profiling over microarrays have been reported, including lower technical variability. However, advances in technology do not remove biological variation between replicates and this variation is often neglected in many analyses.
- RESULTS: We propose an empirical Bayes method, titled Analysis of Sequence Counts (ASC), to detect differential expression based on sequencing technology. ASC borrows information across sequences to establish prior distribution of sample variation, so that biological variation can be accounted for even when replicates are not available. Compared to current approaches that simply tests for equality of proportions in two samples, ASC is less biased towards highly expressed sequences and can identify more genes with a greater log fold change at lower overall abundance.
- CONCLUSIONS: ASC unifies the biological and statistical significance of differential expression by estimating the posterior mean of log fold change and estimating false discovery rates based on the posterior mean. The implementation in R is available at http://www.stat.brown.edu/Zwu/research.aspx.},
- author = {Wu, Zhijin and Jenkins, Bethany D and Rynearson, Tatiana a and Dyhrman, Sonya T and Saito, Mak a and Mercier, Melissa and Whitney, Leann P},
- doi = {10.1186/1471-2105-11-564},
- file = {:Users/ryan/Documents/Mendeley Desktop/Wu et al. - 2010 - Empirical bayes analysis of sequencing-based transcriptional profiling without replicates.pdf:pdf},
- issn = {1471-2105},
- journal = {BMC bioinformatics},
- keywords = {Bayes Theorem,Databases, Genetic,Gene Expression Profiling,Gene Expression Profiling: methods,Genomics,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Sequence Analysis, DNA,Sequence Analysis, RNA},
- month = {jan},
- number = {1},
- pages = {564},
- pmid = {21080965},
- publisher = {BioMed Central Ltd},
- title = {{Empirical bayes analysis of sequencing-based transcriptional profiling without replicates.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3098101{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {11},
- year = {2010}
- }
- @article{Ackermann2009a,
- abstract = {BACKGROUND: Analysis of microarray and other high-throughput data on the basis of gene sets, rather than individual genes, is becoming more important in genomic studies. Correspondingly, a large number of statistical approaches for detecting gene set enrichment have been proposed, but both the interrelations and the relative performance of the various methods are still very much unclear.
- RESULTS: We conduct an extensive survey of statistical approaches for gene set analysis and identify a common modular structure underlying most published methods. Based on this finding we propose a general framework for detecting gene set enrichment. This framework provides a meta-theory of gene set analysis that not only helps to gain a better understanding of the relative merits of each embedded approach but also facilitates a principled comparison and offers insights into the relative interplay of the methods.
- CONCLUSION: We use this framework to conduct a computer simulation comparing 261 different variants of gene set enrichment procedures and to analyze two experimental data sets. Based on the results we offer recommendations for best practices regarding the choice of effective procedures for gene set enrichment analysis.},
- author = {Ackermann, Marit and Strimmer, Korbinian},
- doi = {10.1186/1471-2105-10-47},
- file = {:Users/ryan/Documents/Mendeley Desktop/Ackermann, Strimmer - 2009 - A general modular framework for gene set enrichment analysis.pdf:pdf},
- isbn = {1471210510},
- issn = {1471-2105},
- journal = {BMC bioinformatics},
- keywords = {Algorithms,Animals,Computer Simulation,Databases, Genetic,Humans,Models, Statistical,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods},
- month = {jan},
- pages = {47},
- pmid = {19192285},
- title = {{A general modular framework for gene set enrichment analysis.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2661051{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {10},
- year = {2009}
- }
- @incollection{Houseman2015,
- abstract = {The present study responds to the poor treatment given to dilatancy in classical rock mechanics post-failure problems such as tunnel or mine pillar design. A comprehensive review of the literature and observations in regard to published test results would indicate that dilatancy is highly dependent both on the plasticity already experienced by the material and confining stress; moreover, it also appears that scale may play a non-negligible role. In our article, we provide a detailed analysis of published test data with a view to proposing a sufficiently significant but conveniently simple formulation of the dilatancyangle that reflects these dependencies and that can be readily implemented in numerical codes. The model is then tested, demonstrating that it is capable of representing rock sample strain behaviour in compressive tests. Finally, the model is applied to the resolution of ground reaction curves for tunnels in poor-to-average-quality rock masses, showing a good correlation with results obtained using practical rock engineering techniques.},
- author = {Houseman, E. Andr{\'{e}}s},
- booktitle = {Computational and Statistical Epigenomics},
- doi = {10.1007/978-94-017-9927-0_2},
- editor = {Teschendorff, Andrew E.},
- file = {:Users/ryan/Documents/Mendeley Desktop/Houseman - 2015 - DNA Methylation and Cell-Type Distribution.pdf:pdf},
- isbn = {978-94-017-9926-3},
- keywords = {Cell composition,Confounding,DMP,DMR,Immune,Mediation},
- pages = {35--50},
- publisher = {Springer Netherlands},
- title = {{DNA Methylation and Cell-Type Distribution}},
- url = {http://link.springer.com/10.1007/978-94-017-9927-0{\_}2},
- volume = {7},
- year = {2015}
- }
- @article{Rashid2011,
- abstract = {ZINBA (Zero-Inflated Negative Binomial Algorithm) identifies genomic regions enriched in a variety of ChIP-seq and related next-generation sequencing experiments (DNA-seq), calling both broad and narrow modes of enrichment across a range of signal-to-noise ratios. ZINBA models and accounts for factors that co-vary with background or experimental signal, such as G/C content, and identifies enrichment in genomes with complex local copy number variations. ZINBA provides a single unified framework for analyzing DNA-seq experiments in challenging genomic contexts.},
- author = {Rashid, Naim U and Giresi, Paul G and Ibrahim, Joseph G and Sun, Wei and Lieb, Jason D},
- doi = {10.1186/gb-2011-12-7-r67},
- file = {:Users/ryan/Documents/Mendeley Desktop/Rashid et al. - 2011 - ZINBA integrates local covariates with DNA-seq data to identify broad and narrow regions of enrichment, even with.pdf:pdf},
- issn = {1465-6914},
- journal = {Genome biology},
- keywords = {Algorithms,Computer Simulation,DNA Copy Number Variations,Genomics,Genomics: methods,Models, Genetic,Models, Statistical,Sequence Analysis, DNA,Software},
- month = {jan},
- number = {7},
- pages = {R67},
- pmid = {21787385},
- publisher = {BioMed Central Ltd},
- title = {{ZINBA integrates local covariates with DNA-seq data to identify broad and narrow regions of enrichment, even within amplified genomic regions.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3218829{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {12},
- year = {2011}
- }
- @article{McCall2011,
- abstract = {Robust multiarray analysis (RMA) is the most widely used preprocessing algorithm for Affymetrix and Nimblegen gene expression microarrays. RMA performs background correction, normalization, and summarization in a modular way. The last 2 steps require multiple arrays to be analyzed simultaneously. The ability to borrow information across samples provides RMA various advantages. For example, the summarization step fits a parametric model that accounts for probe effects, assumed to be fixed across arrays, and improves outlier detection. Residuals, obtained from the fitted model, permit the creation of useful quality metrics. However, the dependence on multiple arrays has 2 drawbacks: (1) RMA cannot be used in clinical settings where samples must be processed individually or in small batches and (2) data sets preprocessed separately are not comparable. We propose a preprocessing algorithm, frozen RMA (fRMA), which allows one to analyze microarrays individually or in small batches and then combine the data for analysis. This is accomplished by utilizing information from the large publicly available microarray databases. In particular, estimates of probe-specific effects and variances are precomputed and frozen. Then, with new data sets, these are used in concert with information from the new arrays to normalize and summarize the data. We find that fRMA is comparable to RMA when the data are analyzed as a single batch and outperforms RMA when analyzing multiple batches. The methods described here are implemented in the R package fRMA and are currently available for download from the software section of http://rafalab.jhsph.edu.},
- author = {McCall, Matthew N and Bolstad, Benjamin M and Irizarry, Rafael A},
- doi = {10.1093/biostatistics/kxp059},
- file = {:Users/ryan/Documents/Mendeley Desktop/McCall, Bolstad, Irizarry - 2010 - Frozen robust multiarray analysis (fRMA).pdf:pdf},
- issn = {1471-2105},
- journal = {Biostatistics},
- keywords = {interventions for carers,palliative care,sytematic review},
- month = {apr},
- number = {2},
- pages = {242--253},
- pmid = {20097884},
- publisher = {BioMed Central Ltd},
- title = {{Frozen robust multiarray analysis (fRMA)}},
- url = {http://www.biomedcentral.com/1471-2105/12/369 https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-12-369 http://www.ncbi.nlm.nih.gov/pubmed/20097884 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC2830579 https://academic.o},
- volume = {11},
- year = {2010}
- }
- @article{Collado-Torres2016,
- abstract = {recount is a resource of processed and summarized expression data spanning nearly 60,000 human RNA-seq samples from the Sequence Read Archive (SRA). The associated recount Bioconductor package provides a convenient API for querying, downloading, and analyzing the data. Each processed study consists of meta/phenotype data, the expression levels of genes and their underlying exons and splice junctions, and corresponding genomic annotation. We also provide data summarization types for quantifying novel transcribed sequence including base-resolution coverage and potentially unannotated splice junctions. We present workflows illustrating how to use recount to perform differential expression analysis including meta-analysis, annotation-free base-level analysis, and replication of smaller studies using data from larger studies. recount provides a valuable and user-friendly resource of processed RNA-seq datasets to draw additional biological insights from existing public data. The resource is available at https://jhubiostatistics.shinyapps.io/recount/.},
- author = {Collado-Torres, Leonardo and Nellore, Abhinav and Kammers, Kai and Ellis, Shannon E and Taub, Margaret A and Hansen, Kasper D and Jaffe, Andrew E and Langmead, Ben and Leek, Jeffrey},
- doi = {10.1101/068478},
- file = {:Users/ryan/Documents/Mendeley Desktop/Collado-Torres et al. - 2016 - recount A large-scale resource of analysis-ready RNA-seq expression data.pdf:pdf},
- journal = {bioRxiv},
- pages = {068478},
- title = {{recount: A large-scale resource of analysis-ready RNA-seq expression data}},
- url = {http://biorxiv.org/content/biorxiv/early/2016/08/08/068478.full.pdf},
- year = {2016}
- }
- @article{Jiao2014,
- abstract = {Motivation: There is a growing number of studies generating matched Illumina Infinium HumanMethylation450 and gene expression data, yet there is a corresponding shortage of statistical tools aimed at their integrative analysis. Such integrative tools are important for the discovery of epigenetically regulated gene modules or molecular pathways, which play key roles in cellular differentiation and disease.Results: Here, we present a novel functional supervised algorithm, called Functional Epigenetic Modules (FEM), for the integrative analysis of Infinium 450k DNA methylation and matched or unmatched gene expression data. The algorithm identifies gene modules of coordinated differential methylation and differential expression in the context of a human interactome. We validate the FEM algorithm on simulated and real data, demonstrating how it successfully retrieves an epigenetically deregulated gene, previously known to drive endometrial cancer development. Importantly, in the same cancer, FEM identified a novel epigenetically deregulated hotspot, directly upstream of the well-known progesterone receptor tumour suppressor pathway. In the context of cellular differentiation, FEM successfully identifies known endothelial cell subtype-specific gene expression markers, as well as a novel gene module whose overexpression in blood endothelial cells is mediated by DNA hypomethylation. The systems-level integrative framework presented here could be used to identify novel key genes or signalling pathways, which drive cellular differentiation or disease through an underlying epigenetic mechanism.Availability and implementation: FEM is freely available as an R-package from http://sourceforge.net/projects/funepimod.Contact: andrew@picb.ac.cnSupplementary information: Supplementary data are available at Bioinformatics online.},
- author = {Jiao, Yinming and Widschwendter, Martin and Teschendorff, Andrew E.},
- doi = {10.1093/bioinformatics/btu316},
- file = {:Users/ryan/Documents/Mendeley Desktop/Jiao, Widschwendter, Teschendorff - 2014 - A systems-level integrative framework for genome-wide DNA methylation and gene expression dat.pdf:pdf},
- isbn = {1367-4811},
- issn = {14602059},
- journal = {Bioinformatics},
- number = {16},
- pages = {2360--2366},
- pmid = {24794928},
- title = {{A systems-level integrative framework for genome-wide DNA methylation and gene expression data identifies differential gene expression modules under epigenetic control}},
- url = {http://bioinformatics.oxfordjournals.org/content/30/16/2360.full.pdf},
- volume = {30},
- year = {2014}
- }
- @article{Peng2012,
- author = {Peng, Ze and Zhao, Zhiying and Nath, Nandita and Froula, Jeff L. and Clum, Alicia and Zhang, Tao and Cheng, Jan-fang and Copeland, Alex C. and Pennacchio, Len a. and Chen, Feng},
- doi = {10.1371/journal.pone.0029437},
- editor = {Pellegrini, Matteo},
- file = {:Users/ryan/Documents/Mendeley Desktop/Peng et al. - 2012 - Generation of Long Insert Pairs Using a Cre-LoxP Inverse PCR Approach.pdf:pdf},
- issn = {1932-6203},
- journal = {PLoS ONE},
- month = {jan},
- number = {1},
- pages = {e29437},
- title = {{Generation of Long Insert Pairs Using a Cre-LoxP Inverse PCR Approach}},
- url = {http://dx.plos.org/10.1371/journal.pone.0029437},
- volume = {7},
- year = {2012}
- }
- @article{10.2307/1390807,
- abstract = {In this article we discuss our experience designing and implementing a statistical computing language. In developing this new language, we sought to combine what we felt were useful features from two existing computer languages. We feel that the new language provides advantages in the areas of portability, computational efficiency, memory management, and scoping.},
- author = {Ihaka, Ross and Gentleman, Robert},
- issn = {10618600},
- journal = {Journal of Computational and Graphical Statistics},
- number = {3},
- pages = {299--314},
- publisher = {[American Statistical Association, Taylor {\&} Francis, Ltd., Institute of Mathematical Statistics, Interface Foundation of America]},
- title = {{R: A Language for Data Analysis and Graphics}},
- url = {http://www.jstor.org/stable/1390807},
- volume = {5},
- year = {1996}
- }
- @article{Boos2011,
- abstract = {Cyclin-dependent kinases (CDKs) play crucial roles in promoting DNA replication and preventing rereplication in eukaryotic cells [1-4]. In budding yeast, CDKs promote DNA replication by phosphorylating two proteins, Sld2 and Sld3, which generates binding sites for pairs of BRCT repeats (breast cancer gene 1 [BRCA1] C terminal repeats) in the Dpb11 protein [5, 6]. The Sld3-Dpb11-Sld2 complex generated by CDK phosphorylation is required for the assembly and activation of the Cdc45-Mcm2-7-GINS (CMG) replicative helicase. In response to DNA replication stress, the interaction between Sld3 and Dpb11 is blocked by the checkpoint kinase Rad53 [7], which prevents late origin firing [7, 8]. Here we show that the two key CDK sites in Sld3 are conserved in the human Sld3-related protein Treslin/ticrr and are essential for DNA replication. Moreover, phosphorylation of these two sites mediates interaction with the orthologous pair of BRCT repeats in the human Dpb11 ortholog, TopBP1. Finally, we show that DNA replication stress prevents the interaction between Treslin/ticrr and TopBP1 via the Chk1 checkpoint kinase. Our results indicate that Treslin/ticrr is a genuine ortholog of Sld3 and that the Sld3-Dpb11 interaction has remained a critical nexus of S phase regulation through eukaryotic evolution.},
- author = {Boos, Dominik and Sanchez-Pulido, Luis and Rappas, Mathieu and Pearl, Laurence H and Oliver, Antony W and Ponting, Chris P and Diffley, John F X},
- doi = {10.1016/j.cub.2011.05.057},
- file = {:Users/ryan/Documents/Mendeley Desktop/Boos et al. - 2011 - Regulation of DNA replication through Sld3-Dpb11 interaction is conserved from yeast to humans.pdf:pdf},
- issn = {1879-0445},
- journal = {Current biology : CB},
- keywords = {Amino Acid Sequence,Cell Cycle Proteins,Cell Cycle Proteins: chemistry,Cell Cycle Proteins: metabolism,Cell Cycle Proteins: physiology,Conserved Sequence,Cyclin-Dependent Kinases,Cyclin-Dependent Kinases: chemistry,Cyclin-Dependent Kinases: physiology,DNA Replication,DNA Replication: physiology,Evolution, Molecular,Fungal Proteins,Fungal Proteins: chemistry,Fungal Proteins: metabolism,Fungal Proteins: physiology,HeLa Cells,Humans,Molecular Sequence Data,Protein Kinases,Protein Kinases: metabolism,Protein Kinases: physiology,Saccharomyces cerevisiae Proteins,Saccharomyces cerevisiae Proteins: chemistry,Saccharomyces cerevisiae Proteins: metabolism,Saccharomyces cerevisiae Proteins: physiology,Sequence Alignment,Yeasts,Yeasts: genetics},
- month = {jul},
- number = {13},
- pages = {1152--7},
- pmid = {21700459},
- publisher = {Elsevier Ltd},
- title = {{Regulation of DNA replication through Sld3-Dpb11 interaction is conserved from yeast to humans.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/21700459},
- volume = {21},
- year = {2011}
- }
- @article{Zaharia2011,
- abstract = {We present the Scalable Nucleotide Alignment Program (SNAP), a new short and long read aligner that is both more accurate (i.e., aligns more reads with fewer errors) and 10-100x faster than state-of-the-art tools such as BWA. Unlike recent aligners based on the Burrows-Wheeler transform, SNAP uses a simple hash index of short seed sequences from the genome, similar to BLAST's. However, SNAP greatly reduces the number and cost of local alignment checks performed through several measures: it uses longer seeds to reduce the false positive locations considered, leverages larger memory capacities to speed index lookup, and excludes most candidate locations without fully computing their edit distance to the read. The result is an algorithm that scales well for reads from one hundred to thousands of bases long and provides a rich error model that can match classes of mutations (e.g., longer indels) that today's fast aligners ignore. We calculate that SNAP can align a dataset with 30x coverage of a human genome in less than an hour for a cost of {\$}2 on Amazon EC2, with higher accuracy than BWA. Finally, we describe ongoing work to further improve SNAP.},
- archivePrefix = {arXiv},
- arxivId = {arXiv:1111.5572v1},
- author = {Zaharia, Matei and Bolosky, Wj and Curtis, Kristal},
- eprint = {arXiv:1111.5572v1},
- file = {:Users/ryan/Documents/Mendeley Desktop/Zaharia, Bolosky, Curtis - 2011 - Faster and More Accurate Sequence Alignment with SNAP.pdf:pdf},
- isbn = {0123456789012},
- journal = {arXiv preprint arXiv: {\ldots}},
- pages = {1--10},
- title = {{Faster and More Accurate Sequence Alignment with SNAP}},
- url = {http://arxiv.org/abs/1111.5572},
- year = {2011}
- }
- @inproceedings{Patel2018,
- author = {Patel, J and Heshmati, K and Rogers, A and Wali, R and Jonsson, J and Liu, C and Emery, E and Collins, D and Karzai, S and Piper, J},
- booktitle = {2018 American Transplant Congress},
- title = {{Determining the Utility of Protocol Biopsies in Kidney Transplant Recipients [abstract].}},
- url = {https://atcmeetingabstracts.com/abstract/determining-the-utility-of-protocol-biopsies-in-kidney-transplant-recipients/},
- year = {2018}
- }
- @article{Bair2004,
- abstract = {An important goal of DNA microarray research is to develop tools to diagnose cancer more accurately based on the genetic profile of a tumor. There are several existing techniques in the literature for performing this type of diagnosis. Unfortunately, most of these techniques assume that different subtypes of cancer are already known to exist. Their utility is limited when such subtypes have not been previously identified. Although methods for identifying such subtypes exist, these methods do not work well for all datasets. It would be desirable to develop a procedure to find such subtypes that is applicable in a wide variety of circumstances. Even if no information is known about possible subtypes of a certain form of cancer, clinical information about the patients, such as their survival time, is often available. In this study, we develop some procedures that utilize both the gene expression data and the clinical data to identify subtypes of cancer and use this knowledge to diagnose future patients. These procedures were successfully applied to several publicly available datasets. We present diagnostic procedures that accurately predict the survival of future patients based on the gene expression profile and survival times of previous patients. This has the potential to be a powerful tool for diagnosing and treating cancer.},
- author = {Bair, Eric and Tibshirani, Robert},
- doi = {10.1371/journal.pbio.0020108},
- file = {:Users/ryan/Documents/Mendeley Desktop/Bair, Tibshirani - 2004 - Semi-supervised methods to predict patient survival from gene expression data.pdf:pdf},
- issn = {1545-7885},
- journal = {PLoS biology},
- keywords = {Breast Neoplasms,Breast Neoplasms: metabolism,Breast Neoplasms: mortality,Cluster Analysis,Computer Simulation,Data Interpretation, Statistical,Databases, Factual,Gene Expression Profiling,Humans,Models, Statistical,Neoplasms,Neoplasms: metabolism,Neoplasms: mortality,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Principal Component Analysis,Prognosis,Software,Time Factors,Treatment Outcome},
- month = {apr},
- number = {4},
- pages = {E108},
- pmid = {15094809},
- title = {{Semi-supervised methods to predict patient survival from gene expression data.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=387275{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {2},
- year = {2004}
- }
- @article{Efron1983a,
- abstract = {This is an invited expository article for The American Statistician. It reviews the nonparametric estimation of statistical error, mainly the bias and standard error of an estimator, or the error rate of a prediction rule. The presentation is written at a relaxed mathematical level, omitting most proofs, regularity conditions, and technical details.},
- author = {Efron, Bradley and Gong, Gail},
- doi = {10.1080/00031305.1983.10483087},
- file = {:Users/ryan/Documents/Mendeley Desktop/Efron, Gong - 1983 - A leisurely look at the bootstrap, the jackknife, and cross-validation.pdf:pdf},
- isbn = {08834237},
- issn = {0003-1305},
- journal = {American Statistician},
- keywords = {bias estimation,error rate prediction,nonparametric confidence intervals,nonparametric standard errors,variance estimation},
- number = {1},
- pages = {36--48},
- pmid = {2685844},
- title = {{A leisurely look at the bootstrap, the jackknife, and cross-validation}},
- url = {http://www.rochester.edu/College/psc/clarke/405/EfronGong.pdf},
- volume = {37},
- year = {1983}
- }
- @article{Stanke2003,
- author = {Stanke, M. and Waack, S.},
- doi = {10.1093/bioinformatics/btg1080},
- file = {:Users/ryan/Documents/Mendeley Desktop/Stanke, Waack - 2003 - Gene prediction with a hidden Markov model and a new intron submodel.pdf:pdf},
- issn = {1367-4803},
- journal = {Bioinformatics},
- month = {oct},
- number = {Suppl 2},
- pages = {ii215--ii225},
- title = {{Gene prediction with a hidden Markov model and a new intron submodel}},
- url = {http://bioinformatics.oxfordjournals.org/cgi/doi/10.1093/bioinformatics/btg1080},
- volume = {19},
- year = {2003}
- }
- @article{Zhu2018,
- abstract = {In RNA-seq differential expression analysis, investigators aim to detect those genes with changes in expression level across experimental conditions, despite technical and biological variability in the observations. A common task is to accurately estimate the effect size, often in terms of a logarithmic fold change (LFC) in expression levels. When the counts of reads are low or highly variable in either or both conditions, the maximum likelihood estimates for the LFCs has high variance, leading to large estimates not representative of true differences, and poor ranking of genes by effect size. One approach is to introduce filtering thresholds and pseudocounts to exclude or moderate estimated LFCs. Filtering may result in a loss of genes from the analysis with true differences in expression across conditions, while pseudocounts provide a limited solution that needs to be adapted per dataset. Here, we propose the use of a heavy-tailed Cauchy prior distribution for effect sizes, which avoids the use of filter thresholds or pseudocounts. The proposed method, Approximate Posterior Estimation for GLM, apeglm, has lower bias than previously proposed shrinkage estimators, while still reducing variance for those genes with little information for statistical inference. The apeglm package is available as an R/Bioconductor package at http://bioconductor.org/packages/apeglm, and the methods can be called from within the DESeq2 software.},
- author = {Zhu, Anqi and Ibrahim, Joseph G. and Love, Michael I},
- doi = {10.1101/303255},
- file = {:Users/ryan/Documents/Mendeley Desktop/Zhu, Ibrahim, Love - 2018 - Heavy-tailed prior distributions for sequence count data removing the noise and preserving large differences.pdf:pdf},
- issn = {1367-4803},
- journal = {bioRxiv},
- keywords = {effect size,empirical bayes,hierarchical model,log fold change,rna sequencing,rna-seq,shrinkage estimation,statistical method},
- pages = {303255},
- title = {{Heavy-tailed prior distributions for sequence count data: removing the noise and preserving large differences}},
- url = {https://www.biorxiv.org/content/early/2018/04/17/303255},
- year = {2018}
- }
- @article{GallegoRomero2014,
- abstract = {BACKGROUND: The use of low quality RNA samples in whole-genome gene expression profiling remains controversial. It is unclear if transcript degradation in low quality RNA samples occurs uniformly, in which case the effects of degradation can be corrected via data normalization, or whether different transcripts are degraded at different rates, potentially biasing measurements of expression levels. This concern has rendered the use of low quality RNA samples in whole-genome expression profiling problematic. Yet, low quality samples (for example, samples collected in the course of fieldwork) are at times the sole means of addressing specific questions.$\backslash$n$\backslash$nRESULTS: We sought to quantify the impact of variation in RNA quality on estimates of gene expression levels based on RNA-seq data. To do so, we collected expression data from tissue samples that were allowed to decay for varying amounts of time prior to RNA extraction. The RNA samples we collected spanned the entire range of RNA Integrity Number (RIN) values (a metric commonly used to assess RNA quality). We observed widespread effects of RNA quality on measurements of gene expression levels, as well as a slight but significant loss of library complexity in more degraded samples.$\backslash$n$\backslash$nCONCLUSIONS: While standard normalizations failed to account for the effects of degradation, we found that by explicitly controlling for the effects of RIN using a linear model framework we can correct for the majority of these effects. We conclude that in instances in which RIN and the effect of interest are not associated, this approach can help recover biologically meaningful signals in data from degraded RNA samples.},
- author = {{Gallego Romero}, Irene and Pai, Athma A and Tung, Jenny and Gilad, Yoav},
- doi = {10.1186/1741-7007-12-42},
- file = {:Users/ryan/Documents/Mendeley Desktop/Gallego Romero et al. - 2014 - RNA-seq impact of RNA degradation on transcript quantification.pdf:pdf},
- isbn = {1741-7007},
- issn = {1741-7007},
- journal = {BMC biology},
- keywords = {Gene Expression Profiling,Genes,Humans,Molecular Sequence Annotation,Principal Component Analysis,RNA Stability,RNA Stability: genetics,RNA, Messenger,RNA, Messenger: genetics,RNA, Messenger: metabolism,Sequence Analysis, RNA,Sequence Analysis, RNA: methods,Statistics, Nonparametric},
- number = {1},
- pages = {42},
- pmid = {24885439},
- title = {{RNA-seq: impact of RNA degradation on transcript quantification.}},
- url = {http://bmcbiol.biomedcentral.com/articles/10.1186/1741-7007-12-42},
- volume = {12},
- year = {2014}
- }
- @article{Law2014,
- abstract = {New normal linear modeling strategies are presented for analyzing read counts from RNA-seq experiments. The voom method estimates the mean-variance relationship of the log-counts, generates a precision weight for each observation and enters these into the limma empirical Bayes analysis pipeline. This opens access for RNA-seq analysts to a large body of methodology developed for microarrays. Simulation studies show that voom performs as well or better than count-based RNA-seq methods even when the data are generated according to the assumptions of the earlier methods. Two case studies illustrate the use of linear modeling and gene set testing methods.},
- author = {Law, Charity W. and Chen, Yunshun and Shi, Wei and Smyth, Gordon K.},
- doi = {10.1186/gb-2014-15-2-r29},
- file = {:Users/ryan/Documents/Mendeley Desktop/Law et al. - 2014 - voom precision weights unlock linear model analysis tools for RNA-seq read counts.pdf:pdf},
- issn = {1465-6906},
- journal = {Genome Biology},
- number = {2},
- pages = {R29},
- title = {{voom: precision weights unlock linear model analysis tools for RNA-seq read counts}},
- url = {https://genomebiology.biomedcentral.com/track/pdf/10.1186/gb-2014-15-2-r29 http://genomebiology.biomedcentral.com/articles/10.1186/gb-2014-15-2-r29},
- volume = {15},
- year = {2014}
- }
- @article{Robinson2007,
- abstract = {Digital gene expression (DGE) technologies measure gene expression by counting sequence tags. They are sensitive technologies for measuring gene expression on a genomic scale, without the need for prior knowledge of the genome sequence. As the cost of sequencing DNA decreases, the number of DGE datasets is expected to grow dramatically. Various tests of differential expression have been proposed for replicated DGE data using binomial, Poisson, negative binomial or pseudo-likelihood (PL) models for the counts, but none of the these are usable when the number of replicates is very small.},
- author = {Robinson, Mark D and Smyth, Gordon K},
- doi = {10.1093/bioinformatics/btm453},
- file = {:Users/ryan/Documents/Mendeley Desktop/Robinson, Smyth - 2007 - Moderated statistical tests for assessing differences in tag abundance.pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {Algorithms,Computer Simulation,Data Interpretation, Statistical,Expressed Sequence Tags,Gene Expression Profiling,Gene Expression Profiling: methods,Likelihood Functions,Models, Genetic,Models, Statistical,Poisson Distribution,Sequence Analysis, DNA,Sequence Analysis, DNA: methods,Signal Processing, Computer-Assisted},
- month = {nov},
- number = {21},
- pages = {2881--7},
- pmid = {17881408},
- title = {{Moderated statistical tests for assessing differences in tag abundance.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/17881408},
- volume = {23},
- year = {2007}
- }
- @article{Blume2018,
- abstract = {Verifying that a statistically significant result is scientifically meaningful is not only good scientific practice, it is a natural way to control the Type I error rate. Here we introduce a novel extension of the p-value – a second-generation p-value p{\_}delta – that formally accounts for scientific relevance and leverages this natural Type I Error control. The approach relies on a pre-specified interval null hypothesis that represents the collection of effect sizes that are scientifically uninteresting or are practically null. The second-generation p-value is the proportion of datasupported hypotheses that are also null hypotheses. As such, second-generation p-values indicate when the data are compatible with null hypotheses p{\_}delta = 1 , or with alternative hypotheses p{\_}delta = 0 , or when the data are inconclusive 0 {\textless} p{\_}delta {\textless} 1 . Moreover, secondgeneration p-values provide a proper scientific adjustment for multiple comparisons and reduce false discovery rates. This is an advance for environments rich in data, where traditional p-value adjustments are needlessly punitive. Second-generation p-values promote transparency, rigor and reproducibility of scientific results by a priori specifying which candidate hypotheses are practically meaningful and by providing a more reliable statistical summary of when the data are compatible with alternative or null hypotheses.},
- archivePrefix = {arXiv},
- arxivId = {1709.09333},
- author = {Blume, Jeffrey D. and {D'Agostino McGowan}, Lucy and Dupont, William D. and Greevy, Robert A.},
- doi = {10.1371/journal.pone.0188299},
- editor = {Smalheiser, Neil R.},
- eprint = {1709.09333},
- file = {:Users/ryan/Documents/Mendeley Desktop/Blume et al. - 2018 - Second-generation p-values Improved rigor, reproducibility, {\&}amp transparency in statistical analyses.pdf:pdf},
- isbn = {1111111111},
- issn = {1932-6203},
- journal = {PLOS ONE},
- month = {mar},
- number = {3},
- pages = {e0188299},
- title = {{Second-generation p-values: Improved rigor, reproducibility, {\&} transparency in statistical analyses}},
- url = {https://arxiv.org/ftp/arxiv/papers/1709/1709.09333.pdf http://dx.plos.org/10.1371/journal.pone.0188299},
- volume = {13},
- year = {2018}
- }
- @article{Robinson2010,
- abstract = {The fine detail provided by sequencing-based transcriptome surveys suggests that RNA-seq is likely to become the platform of choice for interrogating steady state RNA. In order to discover biologically important changes in expression, we show that normalization continues to be an essential step in the analysis. We outline a simple and effective method for performing normalization and show dramatically improved results for inferring differential expression in simulated and publicly available data sets.},
- author = {Robinson, Mark D and Oshlack, Alicia},
- doi = {10.1186/gb-2010-11-3-r25},
- file = {:Users/ryan/Documents/Mendeley Desktop/Robinson, Oshlack - 2010 - A scaling normalization method for differential expression analysis of RNA-seq data.pdf:pdf},
- issn = {1465-6906},
- journal = {Genome Biology},
- keywords = {Base Sequence,Base Sequence: genetics,Computer Simulation,Gene Expression Profiling,Gene Expression Profiling: methods,Gene Library,Models,RNA,RNA: genetics,Statistical},
- month = {jan},
- number = {3},
- pages = {R25},
- pmid = {20196867},
- title = {{A scaling normalization method for differential expression analysis of RNA-seq data}},
- url = {http://genomebiology.biomedcentral.com/articles/10.1186/gb-2010-11-3-r25},
- volume = {11},
- year = {2010}
- }
- @article{Purdom2008,
- abstract = {MOTIVATION: Analyses of EST data show that alternative splicing is much more widespread than once thought. The advent of exon and tiling microarrays means that researchers now have the capacity to experimentally measure alternative splicing on a genome wide level. New methods are needed to analyze the data from these arrays. RESULTS: We present a method, finding isoforms using robust multichip analysis (FIRMA), for detecting differential alternative splicing in exon array data. FIRMA has been developed for Affymetrix exon arrays, but could in principle be extended to other exon arrays, tiling arrays or splice junction arrays. We have evaluated the method using simulated data, and have also applied it to two datasets: a panel of 11 human tissues and a set of 10 pairs of matched normal and tumor colon tissue. FIRMA is able to detect exons in several genes confirmed by reverse transcriptase PCR. AVAILABILITY: R code implementing our methods is contributed to the package aroma.affymetrix.},
- author = {Purdom, E and Simpson, K M and Robinson, M D and Conboy, J G and Lapuk, a V and Speed, T P},
- doi = {10.1093/bioinformatics/btn284},
- file = {:Users/ryan/Documents/Mendeley Desktop/Purdom et al. - 2008 - FIRMA a method for detection of alternative splicing from exon array data.pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {Algorithms,Chromosome Mapping,Chromosome Mapping: methods,DNA,DNA: methods,Databases, Genetic,Expressed Sequence Tags,Genetic,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,RNA Splice Sites,RNA Splice Sites: genetics,Reverse Transcriptase Polymerase Chain Reaction,Reverse Transcriptase Polymerase Chain Reaction: m,Sequence Analysis, DNA,Sequence Analysis, DNA: methods},
- month = {aug},
- number = {15},
- pages = {1707--14},
- pmid = {18573797},
- title = {{FIRMA: a method for detection of alternative splicing from exon array data.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2638867{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {24},
- year = {2008}
- }
- @article{Moreno-Hagelsieb2008,
- abstract = {The analyses of the increasing number of genome sequences requires shortcuts for the detection of orthologs, such as Reciprocal Best Hits (RBH), where orthologs are assumed if two genes each in a different genome find each other as the best hit in the other genome. Two BLAST options seem to affect alignment scores the most, and thus the choice of a best hit: the filtering of low information sequence segments and the algorithm used to produce the final alignment. Thus, we decided to test whether such options would help better detect orthologs.},
- author = {Moreno-Hagelsieb, Gabriel and Latimer, Kristen},
- doi = {10.1093/bioinformatics/btm585},
- file = {:Users/ryan/Documents/Mendeley Desktop/Moreno-Hagelsieb, Latimer - 2008 - Choosing BLAST options for better detection of orthologs as reciprocal best hits.pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {Base Sequence,Chromosome Mapping,Chromosome Mapping: methods,DNA, Bacterial,DNA, Bacterial: genetics,Database Management Systems,Databases, Genetic,Escherichia coli,Escherichia coli: genetics,Molecular Sequence Data,Proteins,Proteins: genetics,Sequence Alignment,Sequence Alignment: methods,Sequence Analysis, DNA,Sequence Analysis, DNA: methods,Sequence Homology, Nucleic Acid},
- month = {feb},
- number = {3},
- pages = {319--24},
- pmid = {18042555},
- title = {{Choosing BLAST options for better detection of orthologs as reciprocal best hits.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/18042555},
- volume = {24},
- year = {2008}
- }
- @article{Berard2002,
- abstract = {Mature T cells are produced in the thymus and released into the bloodstream in low numbers. These cells are considered to be immunologically naı¨ve until such time as they encounter MHC-peptide complexes for which their T-cell receptors (TCR) have high affinity. Recognition of antigen in appropriate form, i.e. in association with costimulatory signals on the surface of professional antigen-presenting cells (APCs), leads to extensive T-cell proliferation and differentiation into effector cells. Once the infection has been cleared, it is no longer of benefit to the host to maintain high numbers of effector cells and most of the activated T cells die by apoptosis. However, a proportion of these cells survive, leaving the frequency of cells specific for the priming antigen much higher among memory T cells than that which existed among naı¨ve T cells. This difference in frequency makes a major contribution to the nature of the secondary response, which is typically faster and of greater magnitude than the primary response. In addition, T cells may also carry a true ‘memory' of a prior response to antigen, exhibiting differences from naı¨ve T cells at the single cell level. Here we provide a brief overview of the qualitative differences that have been reported to exist between naı¨ve and memory T cells and evidence that memory T cells themselves are functionally heterogeneous. PHENOTYPIC DIFFERENCES BETWEEN NAI ¨VE AND MEMORY T CELLS The supposition that naı¨ve and memory T cells can be distinguished phenotypically is based on the notion that memory T cells retain a permanent imprint of having responded to antigen. Precise identification of memory T cells, however, remains problematic. Unlike B cells, T cells do not appear to mutate their antigen receptor genes during the course of an immune response. Furthermore, discrimi- nation between effector and memory T cells is accomplished Received 2 April 2002; accepted 17 April 2002. Correspondence: David F. Tough, The Edward Jenner Institute for Vaccine Research, Compton, Newbury, Berkshire RG20 7NN, UK. E-mail: david.tough@jenner.ac.uk on},
- author = {Berard, Marion and Tough, David F.},
- doi = {10.1046/j.1365-2567.2002.01447.x},
- file = {:Users/ryan/Documents/Mendeley Desktop/Berard, Tough - 2002 - Qualitative differences between na{\"{i}}ve and memory T cells.pdf:pdf},
- issn = {00192805},
- journal = {Immunology},
- number = {2},
- pages = {127--138},
- title = {{Qualitative differences between na{\"{i}}ve and memory T cells}},
- volume = {106},
- year = {2002}
- }
- @article{Zaher2009,
- abstract = {The overall fidelity of protein synthesis has been thought to rely on the combined accuracy of two basic processes: the aminoacylation of transfer RNAs with their cognate amino acid by the aminoacyl-tRNA synthetases, and the selection of cognate aminoacyl-tRNAs by the ribosome in cooperation with the GTPase elongation factor EF-Tu. These two processes, which together ensure the specific acceptance of a correctly charged cognate tRNA into the aminoacyl (A) site, operate before peptide bond formation. Here we report the identification of an additional mechanism that contributes to high fidelity protein synthesis after peptidyl transfer, using a well-defined in vitro bacterial translation system. In this retrospective quality control step, the incorporation of an amino acid from a non-cognate tRNA into the growing polypeptide chain leads to a general loss of specificity in the A site of the ribosome, and thus to a propagation of errors that results in abortive termination of protein synthesis.},
- author = {Zaher, Hani S and Green, Rachel},
- doi = {10.1038/nature07582},
- file = {:Users/ryan/Documents/Mendeley Desktop/Zaher, Green - 2009 - Quality control by the ribosome following peptide bond formation.pdf:pdf},
- issn = {1476-4687},
- journal = {Nature},
- keywords = {Aminoacylation,Anticodon,Anticodon: genetics,Base Pair Mismatch,Base Pair Mismatch: genetics,Biocatalysis,Codon,Codon: genetics,Escherichia coli,Escherichia coli Proteins,Escherichia coli Proteins: metabolism,Escherichia coli: enzymology,Escherichia coli: genetics,Escherichia coli: metabolism,Peptide Chain Termination, Translational,Peptide Termination Factors,Peptide Termination Factors: metabolism,Protein Biosynthesis,RNA, Transfer,RNA, Transfer: genetics,RNA, Transfer: metabolism,Ribosomes,Ribosomes: chemistry,Ribosomes: genetics,Ribosomes: metabolism,Substrate Specificity},
- month = {jan},
- number = {7226},
- pages = {161--6},
- pmid = {19092806},
- publisher = {Nature Publishing Group},
- title = {{Quality control by the ribosome following peptide bond formation.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2805954{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {457},
- year = {2009}
- }
- @article{Smyth2013,
- abstract = {Permutation tests are amongst the most commonly used statistical tools in modern genomic research, a process by which p-values are attached to a test statistic by randomly permuting the sample or gene labels. Yet permutation p-values published in the genomic literature are often computed incorrectly, understated by about 1/m, where m is the number of permutations. The same is often true in the more general situation when Monte Carlo simulation is used to assign p-values. Although the p-value understatement is usually small in absolute terms, the implications can be serious in a multiple testing context. The understatement arises from the intuitive but mistaken idea of using permutation to estimate the tail probability of the test statistic. We argue instead that permutation should be viewed as generating an exact discrete null distribution. The relevant literature, some of which is likely to have been relatively inaccessible to the genomic community, is reviewed and summarized. A computation strategy is developed for exact p-values when permutations are randomly drawn. The strategy is valid for any number of permutations and samples. Some simple recommendations are made for the implementation of permutation tests in practice.},
- author = {Phipson, Belinda and Smyth, Gordon K},
- doi = {10.2202/1544-6115.1585},
- file = {:Users/ryan/Documents/Mendeley Desktop/Phipson, Smyth - 2010 - Permutation P-values should never be zero calculating exact P-values when permutations are randomly drawn.pdf:pdf},
- issn = {1544-6115},
- journal = {Statistical applications in genetics and molecular biology},
- keywords = {Gene Expression Profiling,Gene Expression Profiling: methods,Genomics,Genomics: methods,Models,Probability,Statistical},
- month = {jan},
- pages = {Article39},
- pmid = {21044043},
- title = {{Permutation P-values should never be zero: calculating exact P-values when permutations are randomly drawn.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/21044043},
- volume = {9},
- year = {2010}
- }
- @article{Zakeri2017,
- author = {Zakeri, Mohsen and Srivastava, Avi and Almodaresi, Fatemeh and Patro, Rob},
- doi = {10.1093/bioinformatics/btx262},
- file = {:Users/ryan/Documents/Mendeley Desktop/Zakeri et al. - 2017 - Improved data-driven likelihood factorizations for transcript abundance estimation.pdf:pdf},
- issn = {1367-4803},
- journal = {Bioinformatics},
- number = {14},
- pages = {i142--i151},
- title = {{Improved data-driven likelihood factorizations for transcript abundance estimation}},
- url = {https://academic.oup.com/bioinformatics/article-lookup/doi/10.1093/bioinformatics/btx262},
- volume = {33},
- year = {2017}
- }
- @article{Berge2017,
- annote = {NULL},
- author = {Berge, Koen Van Den and Soneson, Charlotte and Robinson, Mark D and Clement, Lieven},
- file = {:Users/ryan/Documents/Mendeley Desktop/Berge et al. - 2017 - A general and powerful stage-wise testing procedure for differential expression and differential transcript usage.pdf:pdf},
- keywords = {differential expression,differential transcript usage,rna-sequencing,stage-wise testing},
- pages = {1--14},
- title = {{A general and powerful stage-wise testing procedure for differential expression and differential transcript usage}},
- url = {http://biorxiv.org/content/biorxiv/early/2017/02/16/109082.full.pdf},
- year = {2017}
- }
- @article{Leleu2010,
- abstract = {Chromatin-immunoprecipitation and sequencing (ChIP-seq) is a rapidly maturing technology that draws on the power of high-throughput short-read sequencing to decipher chromatin states with unprecedented precision and breadth. Although some aspects of the experimental protocol require careful tuning, the bottleneck currently firmly lies with the downstream data analysis. We give an overview of the better-established aspects of genome mapping and data normalization and we describe the more recent progress in peak calling and their statistical analysis and provide a brief overview of popular follow-up analyses such as genomic feature categorization and motif search.},
- author = {Leleu, Marion and Lefebvre, Gr{\'{e}}gory and Rougemont, Jacques},
- doi = {10.1093/bfgp/elq022},
- file = {:Users/ryan/Documents/Mendeley Desktop/Leleu, Lefebvre, Rougemont - 2010 - Processing and analyzing ChIP-seq data from short reads to regulatory interactions.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Leleu, Lefebvre, Rougemont - 2010 - Processing and analyzing ChIP-seq data from short reads to regulatory interactions(2).pdf:pdf},
- isbn = {2041-2657 (Electronic)$\backslash$r2041-2649 (Linking)},
- issn = {2041-2649},
- journal = {Briefings in Functional Genomics},
- keywords = {Base Sequence,Bioinformatics,Chip-seq,Chromatin,Chromatin Immunoprecipitation,Chromatin Immunoprecipitation: methods,Chromatin: metabolism,Chromosome Mapping,DNA,DNA binding,DNA: methods,Genetic,Genome,High-Throughput Nucleotide Sequencing,High-Throughput Nucleotide Sequencing: methods,High-throughput sequencing,Humans,Saccharomyces cerevisiae,Saccharomyces cerevisiae: genetics,Sequence Analysis,Transcription,Transcriptional regulation},
- month = {dec},
- number = {5-6},
- pages = {466--476},
- pmid = {20861161},
- title = {{Processing and analyzing ChIP-seq data: from short reads to regulatory interactions}},
- url = {https://academic.oup.com/bfg/article-lookup/doi/10.1093/bfgp/elq022},
- volume = {9},
- year = {2010}
- }
- @article{Northrup2011,
- abstract = {Behaviors observed at the cellular level such as development and acquisition of effector functions by immune cells result from transcriptional changes. The biochemical mediators of transcription are sequence-specific transcription factors (TFs), chromatin modifying enzymes, and chromatin, the complex of DNA and histone proteins. Covalent modification of DNA and histones, also termed epigenetic modification, influences the accessibility of target sequences for transcription factors on chromatin and the expression of linked genes required for immune functions. Genome-wide techniques such as ChIP-Seq have described the entire "cistrome" of transcription factors involved in specific developmental steps of B and T cells and started to define specific immune responses in terms of the binding profiles of critical effectors and epigenetic modification patterns. Current data suggest that both promoters and enhancers are prepared for action at different stages of activation by epigenetic modification through distinct transcription factors in different cells.},
- author = {Northrup, Daniel L and Zhao, Keji},
- doi = {10.1016/j.immuni.2011.06.002},
- file = {:Users/ryan/Documents/Mendeley Desktop/Northrup, Zhao - 2011 - Application of ChIP-Seq and related techniques to the study of immune function.pdf:pdf},
- issn = {1097-4180},
- journal = {Immunity},
- keywords = {Animals,Chromatin Immunoprecipitation,Chromatin Immunoprecipitation: methods,Epigenesis, Genetic,Genome,Humans,Promoter Regions, Genetic,Sequence Analysis, DNA,Sequence Analysis, DNA: methods,Transcription Factors,Transcription Factors: genetics,Transcription Factors: immunology},
- month = {jun},
- number = {6},
- pages = {830--42},
- pmid = {21703538},
- publisher = {Elsevier Inc.},
- title = {{Application of ChIP-Seq and related techniques to the study of immune function.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3137373{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {34},
- year = {2011}
- }
- @article{Stephens2016b,
- author = {Stephens, Matthew},
- file = {:Users/ryan/Documents/Mendeley Desktop/Stephens - 2016 - False Discovery Rates ( FDRs ) A new deal.pdf:pdf},
- pages = {1--12},
- title = {{False Discovery Rates ( FDRs ) A new deal}},
- url = {http://biorxiv.org/content/biorxiv/early/2016/01/29/038216.full.pdf},
- year = {2016}
- }
- @article{Lau2006,
- abstract = {Small noncoding RNAs regulate processes essential for cell growth and development, including mRNA degradation, translational repression, and transcriptional gene silencing (TGS). During a search for candidate mammalian factors for TGS, we purified a complex that contains small RNAs and Riwi, the rat homolog to human Piwi. The RNAs, frequently 29 to 30 nucleotides in length, are called Piwi-interacting RNAs (piRNAs), 94{\%} of which map to 100 defined ({\textless} or = 101 kb) genomic regions. Within these regions, the piRNAs generally distribute across only one genomic strand or distribute on two strands but in a divergent, nonoverlapping manner. Preparations of piRNA complex (piRC) contain rRecQ1, which is homologous to qde-3 from Neurospora, a gene implicated in silencing pathways. Piwi has been genetically linked to TGS in flies, and slicer activity cofractionates with the purified complex. These results are consistent with a gene-silencing role for piRC in mammals.},
- author = {Lau, Nelson C and Seto, Anita G and Kim, Jinkuk and Kuramochi-Miyagawa, Satomi and Nakano, Toru and Bartel, David P and Kingston, Robert E},
- doi = {10.1126/science.1130164},
- file = {:Users/ryan/Documents/Mendeley Desktop/Lau et al. - 2006 - Characterization of the piRNA complex from rat testes.pdf:pdf},
- issn = {1095-9203},
- journal = {Science (New York, N.Y.)},
- keywords = {Adenosine Triphosphatases,Adenosine Triphosphatases: isolation {\&} purificatio,Adenosine Triphosphatases: metabolism,Animals,Chromosomes, Mammalian,Conserved Sequence,DNA Helicases,DNA Helicases: isolation {\&} purification,DNA Helicases: metabolism,Gene Library,Genome,Male,Mice,Proteins,Proteins: isolation {\&} purification,Proteins: metabolism,RNA Interference,RNA, Untranslated,RNA, Untranslated: chemistry,RNA, Untranslated: genetics,RNA, Untranslated: isolation {\&} purification,RNA, Untranslated: metabolism,Rats,Rats, Sprague-Dawley,RecQ Helicases,Ribonucleoproteins,Ribonucleoproteins: chemistry,Ribonucleoproteins: isolation {\&} purification,Ribonucleoproteins: metabolism,Testis,Testis: chemistry,Transcription, Genetic},
- month = {jul},
- number = {5785},
- pages = {363--7},
- pmid = {16778019},
- title = {{Characterization of the piRNA complex from rat testes.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/16778019},
- volume = {313},
- year = {2006}
- }
- @misc{Humphreys2011,
- author = {Humphreys, David T and Muthiah, Kavitha and Thomas, Liza and Macdonald, Peter and Hayward, Chris},
- file = {:Users/ryan/Documents/Mendeley Desktop/Humphreys et al. - 2011 - Assessment of cardiac microRNA high throughput sequencing data sets generated from RNA of varying quality.pdf:pdf},
- title = {{Assessment of cardiac microRNA high throughput sequencing data sets generated from RNA of varying quality.}},
- url = {http://www.slideshare.net/AustralianBioinformatics/assessment-of-cardiac-mi-rna-hts-data-sets-david-t-humphreys},
- year = {2011}
- }
- @article{Berglund2017,
- abstract = {BACKGROUND Autologous and allogeneic adult mesenchymal stem/stromal cells (MSCs) are increasingly being investigated for treating a wide range of clinical diseases. Allogeneic MSCs are especially attractive due to their potential to provide immediate care at the time of tissue injury or disease diagnosis. The prevailing dogma has been that allogeneic MSCs are immune privileged, but there have been very few studies that control for matched or mismatched major histocompatibility complex (MHC) molecule expression and that examine immunogenicity in vivo. Studies that control for MHC expression have reported both cell-mediated and humoral immune responses to MHC-mismatched MSCs. The clinical implications of immune responses to MHC-mismatched MSCs are still unknown. Pre-clinical and clinical studies that document the MHC haplotype of donors and recipients and measure immune responses following MSC treatment are necessary to answer this critical question. CONCLUSIONS This review details what is currently known about the immunogenicity of allogeneic MSCs and suggests contemporary assays that could be utilized in future studies to appropriately identify and measure immune responses to MHC-mismatched MSCs.},
- author = {Berglund, Alix K. and Fortier, Lisa A. and Antczak, Douglas F. and Schnabel, Lauren V.},
- doi = {10.1186/s13287-017-0742-8},
- file = {:Users/ryan/Documents/Mendeley Desktop/Berglund et al. - 2017 - Immunoprivileged no more measuring the immunogenicity of allogeneic adult mesenchymal stem cells.pdf:pdf},
- issn = {1757-6512},
- journal = {Stem Cell Research {\&} Therapy},
- keywords = {Allogeneic,Cytotoxicity,ELISPOT,Immunogenicity,Major histocompatibility complex,Mesenchymal stem cell,Microcytotoxicity,Mixed leukocyte reaction},
- month = {dec},
- number = {1},
- pages = {288},
- publisher = {Stem Cell Research {\&} Therapy},
- title = {{Immunoprivileged no more: measuring the immunogenicity of allogeneic adult mesenchymal stem cells}},
- url = {https://stemcellres.biomedcentral.com/track/pdf/10.1186/s13287-017-0742-8 https://stemcellres.biomedcentral.com/articles/10.1186/s13287-017-0742-8},
- volume = {8},
- year = {2017}
- }
- @article{Aggarwal2005,
- abstract = {Mesenchymal stem cells (MSCs) are multipotent cells found in several adult tissues. Transplanted allogeneic MSCs can be detected in recipients at extended time points, indicating a lack of immune recognition and clearance. As well, a role for bone marrow-derived MSCs in reducing the incidence and severity of graft-versus-host disease (GVHD) during allogeneic transplantation has recently been reported; however, the mechanisms remain to be investigated. We examined the immunomodulatory functions of human MSCs (hMSCs) by coculturing them with purified subpopulations of immune cells and report here that hMSCs altered the cytokine secretion profile of dendritic cells (DCs), naive and effector T cells (T helper 1 [T(H)1] and T(H)2), and natural killer (NK) cells to induce a more anti-inflammatory or tolerant phenotype. Specifically, the hMSCs caused mature DCs type 1 (DC1) to decrease tumor necrosis factor alpha (TNF-alpha) secretion and mature DC2 to increase interleukin-10 (IL-10) secretion; hMSCs caused T(H)1 cells to decrease interferon gamma (IFN-gamma) and caused the T(H)2 cells to increase secretion of IL-4; hMSCs caused an increase in the proportion of regulatory T cells (T(Regs)) present; and hMSCs decreased secretion of IFN-gamma from the NK cells. Mechanistically, the hMSCs produced elevated prostaglandin E2 (PGE(2)) in co-cultures, and inhibitors of PGE(2) production mitigated hMSC-mediated immune modulation. These data offer insight into the interactions between allogeneic MSCs and immune cells and provide mechanisms likely involved with the in vivo MSC-mediated induction of tolerance that could be therapeutic for reduction of GVHD, rejection, and modulation of inflammation.},
- author = {Aggarwal, Sudeepta},
- doi = {10.1182/blood-2004-04-1559},
- file = {:Users/ryan/Documents/Mendeley Desktop/Aggarwal - 2005 - Human mesenchymal stem cells modulate allogeneic immune cell responses.pdf:pdf},
- issn = {0006-4971},
- journal = {Blood},
- month = {feb},
- number = {4},
- pages = {1815--1822},
- title = {{Human mesenchymal stem cells modulate allogeneic immune cell responses}},
- url = {http://www.bloodjournal.org/cgi/doi/10.1182/blood-2004-04-1559},
- volume = {105},
- year = {2005}
- }
- @article{Rangaraju2015,
- author = {Rangaraju, Sunitha and Solis, Gregory M. and Andersson, Sofia I. and Gomez-Amaro, Rafael L. and Kardakaris, Rozina and Broaddus, Caroline D. and Niculescu, Alexander B. and Petrascheck, Michael},
- doi = {10.1111/acel.12379},
- file = {:Users/ryan/Documents/Mendeley Desktop/Rangaraju et al. - 2015 - Atypical antidepressants extend lifespan of iCaenorhabditis elegansi by activation of a non-cell-autonomous st.pdf:pdf},
- issn = {14749718},
- journal = {Aging Cell},
- keywords = {antidepressant},
- number = {June},
- pages = {n/a--n/a},
- title = {{Atypical antidepressants extend lifespan of {\textless}i{\textgreater}Caenorhabditis elegans{\textless}/i{\textgreater} by activation of a non-cell-autonomous stress response}},
- url = {http://doi.wiley.com/10.1111/acel.12379},
- volume = {1},
- year = {2015}
- }
- @article{Vijay2013,
- abstract = {Transcriptome Shotgun Sequencing (RNA-seq) has been readily embraced by geneticists and molecular ecologists alike. As with all high-throughput technologies, it is critical to understand which analytic strategies are best suited and which parameters may bias the interpretation of the data. Here we use a comprehensive simulation approach to explore how various features of the transcriptome (complexity, degree of polymorphism $\pi$, alternative splicing), technological processing (sequencing error $\epsilon$, library normalization) and bioinformatic workflow (de novo vs. mapping assembly, reference genome quality) impact transcriptome quality and inference of differential gene expression (DE). We find that transcriptome assembly and gene expression profiling (EdgeR vs. BaySeq software) works well even in the absence of a reference genome and is robust across a broad range of parameters. We advise against library normalization and in most situations advocate mapping assemblies to an annotated genome of a divergent sister clade, which generally outperformed de novo assembly (Trans-Abyss, Trinity, Soapdenovo-Trans). Transcriptome complexity (size, paralogs, alternative splicing isoforms) negatively affected the assembly and DE profiling, whereas the effects of sequencing error and polymorphism were almost negligible. Finally, we highlight the challenge of gene name assignment for de novo assemblies, the importance of mapping strategies and raise awareness of challenges associated with the quality of reference genomes. Overall, our results have significant practical and methodological implications and can provide guidance in the design and analysis of RNA-seq experiments, particularly for organisms where genomic background information is lacking.},
- author = {Vijay, Nagarjun and Poelstra, Jelmer W and K{\"{u}}nstner, Axel and Wolf, Jochen B W},
- doi = {10.1111/mec.12014},
- file = {:Users/ryan/Documents/Mendeley Desktop/Vijay et al. - 2013 - Challenges and strategies in transcriptome assembly and differential gene expression quantification. A comprehensi.pdf:pdf},
- issn = {1365-294X},
- journal = {Molecular ecology},
- keywords = {2012,accepted 11 july 2012,bioinformatics,comparative genomics,differential gene expression,features of the transcriptome,for genetic nonmodel,received 21 march 2012,revision received 13 june,rna-seq,simulation,systems biology,transcriptome assembly},
- month = {mar},
- number = {3},
- pages = {620--34},
- pmid = {22998089},
- title = {{Challenges and strategies in transcriptome assembly and differential gene expression quantification. A comprehensive in silico assessment of RNA-seq experiments.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/22998089},
- volume = {22},
- year = {2013}
- }
- @article{Yan2011,
- abstract = {The nonhuman primates most commonly used in medical research are from the genus Macaca. To better understand the genetic differences between these animal models, we present high-quality draft genome sequences from two macaque species, the cynomolgus/crab-eating macaque and the Chinese rhesus macaque. Comparison with the previously sequenced Indian rhesus macaque reveals that all three macaques maintain abundant genetic heterogeneity, including millions of single-nucleotide substitutions and many insertions, deletions and gross chromosomal rearrangements. By assessing genetic regions with reduced variability, we identify genes in each macaque species that may have experienced positive selection. Genetic divergence patterns suggest that the cynomolgus macaque genome has been shaped by introgression after hybridization with the Chinese rhesus macaque. Macaque genes display a high degree of sequence similarity with human disease gene orthologs and drug targets. However, we identify several putatively dysfunctional genetic differences between the three macaque species, which may explain functional differences between them previously observed in clinical studies.},
- annote = {From Duplicate 1 (
- Genome sequencing and comparison of two nonhuman primate animal models, the cynomolgus and Chinese rhesus macaques.
- - Yan, Guangmei; Zhang, Guojie; Fang, Xiaodong; Zhang, Yanfeng; Li, Cai; Ling, Fei; Cooper, David N; Li, Qiye; Li, Yan; van Gool, Alain J; Du, Hongli; Chen, Jiesi; Chen, Ronghua; Zhang, Pei; Huang, Zhiyong; Thompson, John R; Meng, Yuhuan; Bai, Yinqi; Wang, Jufang; Zhuo, Min; Wang, Tao; Huang, Ying; Wei, Liqiong; Li, Jianwen; Wang, Zhiwen; Hu, Haofu; Yang, Pengcheng; Le, Liang; Stenson, Peter D; Li, Bo; Liu, Xiaoming; Ball, Edward V; An, Na; Huang, Quanfei; Zhang, Yong; Fan, Wei; Zhang, Xiuqing; Li, Yingrui; Wang, Wen; Katze, Michael G; Su, Bing; Nielsen, Rasmus; Yang, Huanming; Wang, Jun; Wang, Xiaoning; Wang, Jian )
- From Duplicate 2 (
- Genome sequencing and comparison of two nonhuman primate animal models, the cynomolgus and Chinese rhesus macaques.
- - Yan, Guangmei; Zhang, Guojie; Fang, Xiaodong; Zhang, Yanfeng; Li, Cai; Ling, Fei; Cooper, David N; Li, Qiye; Li, Yan; van Gool, Alain J; Du, Hongli; Chen, Jiesi; Chen, Ronghua; Zhang, Pei; Huang, Zhiyong; Thompson, John R; Meng, Yuhuan; Bai, Yinqi; Wang, Jufang; Zhuo, Min; Wang, Tao; Huang, Ying; Wei, Liqiong; Li, Jianwen; Wang, Zhiwen; Hu, Haofu; Yang, Pengcheng; Le, Liang; Stenson, Peter D; Li, Bo; Liu, Xiaoming; Ball, Edward V; An, Na; Huang, Quanfei; Zhang, Yong; Fan, Wei; Zhang, Xiuqing; Li, Yingrui; Wang, Wen; Katze, Michael G; Su, Bing; Nielsen, Rasmus; Yang, Huanming; Wang, Jun; Wang, Xiaoning; Wang, Jian )
- BGI/Merck cyno genome paper
- From Duplicate 2 (Genome sequencing and comparison of two nonhuman primate animal models, the cynomolgus and Chinese rhesus macaques. - Yan, Guangmei; Zhang, Guojie; Fang, Xiaodong; Zhang, Yanfeng; Li, Cai; Ling, Fei; Cooper, David N; Li, Qiye; Li, Yan; van Gool, Alain J; Du, Hongli; Chen, Jiesi; Chen, Ronghua; Zhang, Pei; Huang, Zhiyong; Thompson, John R; Meng, Yuhuan; Bai, Yinqi; Wang, Jufang; Zhuo, Min; Wang, Tao; Huang, Ying; Wei, Liqiong; Li, Jianwen; Wang, Zhiwen; Hu, Haofu; Yang, Pengcheng; Le, Liang; Stenson, Peter D; Li, Bo; Liu, Xiaoming; Ball, Edward V; An, Na; Huang, Quanfei; Zhang, Yong; Fan, Wei; Zhang, Xiuqing; Li, Yingrui; Wang, Wen; Katze, Michael G; Su, Bing; Nielsen, Rasmus; Yang, Huanming; Wang, Jun; Wang, Xiaoning; Wang, Jian)
- BGI/Merck cyno genome paper},
- author = {Yan, Guangmei and Zhang, Guojie and Fang, Xiaodong and Zhang, Yanfeng Yong and Li, Cai and Ling, Fei and Cooper, David N and Li, Qiye and Li, Yingrui Yan and van Gool, Alain J and Du, Hongli and Chen, Jiesi and Chen, Ronghua and Zhang, Pei and Huang, Zhiyong and Thompson, John R and Meng, Yuhuan and Bai, Yinqi and Wang, Jian Jun Jufang and Zhuo, Min and Wang, Tao and Huang, Ying and Wei, Liqiong and Li, Jianwen and Wang, Zhiwen and Hu, Haofu and Yang, Pengcheng and Le, Liang and Stenson, Peter D and Li, Bo and Liu, Xiaoming and Ball, Edward V and An, Na and Huang, Quanfei and Zhang, Yanfeng Yong and Fan, Wei and Zhang, Xiuqing and Li, Yingrui Yan and Wang, Wen and Katze, Michael G and Su, Bing and Nielsen, Rasmus and Yang, Huanming and Wang, Jian Jun Jufang and Wang, Xiaoning and Wang, Jian Jun Jufang},
- doi = {10.1038/nbt.1992},
- file = {:Users/ryan/Documents/Mendeley Desktop/Yan et al. - 2011 - Genome sequencing and comparison of two nonhuman primate animal models, the cynomolgus and Chinese rhesus macaques.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Yan et al. - 2011 - Genome sequencing and comparison of two nonhuman primate animal models, the cynomolgus and Chinese rhesus macaque(2).pdf:pdf},
- issn = {1546-1696},
- journal = {Nature biotechnology},
- keywords = {Animal,Animals,Base Sequence,Chromosome Aberrations,DNA,Evolution,Genetic,Genome,Humans,Macaca fascicularis,Macaca fascicularis: genetics,Macaca mulatta,Macaca mulatta: genetics,Models,Molecular,Molecular Sequence Data,Polymorphism,Sequence Analysis,Species Specificity,cyno-genome,cyno-project},
- mendeley-tags = {cyno-genome,cyno-project},
- month = {nov},
- number = {11},
- pages = {1019--23},
- pmid = {22002653},
- title = {{Genome sequencing and comparison of two nonhuman primate animal models, the cynomolgus and Chinese rhesus macaques.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/22002653},
- volume = {29},
- year = {2011}
- }
- @article{Siegel2014,
- author = {Siegel, T Nicolai and Hon, Chung-Chau and Zhang, Qinfeng and Lopez-Rubio, Jose-Juan and Scheidig-Benatar, Christine and Martins, Rafael M and Sismeiro, Odile and Copp{\'{e}}e, Jean-Yves and Scherf, Artur},
- doi = {10.1186/1471-2164-15-150},
- file = {:Users/ryan/Documents/Mendeley Desktop/Siegel et al. - 2014 - Strand-specific RNA-Seq reveals widespread and developmentally regulated transcription of natural antisense trans.pdf:pdf},
- issn = {1471-2164},
- journal = {BMC Genomics},
- number = {1},
- pages = {150},
- title = {{Strand-specific RNA-Seq reveals widespread and developmentally regulated transcription of natural antisense transcripts in Plasmodium falciparum}},
- url = {http://www.biomedcentral.com/1471-2164/15/150},
- volume = {15},
- year = {2014}
- }
- @article{Soneson2015,
- abstract = {High-throughput sequencing of cDNA (RNA-seq) is used extensively to characterize the transcriptome of cells. Many transcriptomic studies aim at comparing either abundance levels or the transcriptome composition between given conditions, and as a first step, the sequencing reads must be used as the basis for abundance quantification of transcriptomic features of interest, such as genes or transcripts. Various quantification approaches have been proposed, ranging from simple counting of reads that overlap given genomic regions to more complex estimation of underlying transcript abundances. In this paper, we show that gene-level abundance estimates and statistical inference offer advantages over transcript-level analyses, in terms of performance and interpretability. We also illustrate that the presence of differential isoform usage can lead to inflated false discovery rates in differential gene expression analyses on simple count matrices but that this can be addressed by incorporating offsets derived from transcript-level abundance estimates. We also show that the problem is relatively minor in several real data sets. Finally, we provide an R package ( tximport) to help users integrate transcript-level abundance estimates from common quantification pipelines into count-based statistical inference engines.},
- author = {Soneson, Charlotte and Love, Michael I. and Robinson, Mark D.},
- doi = {10.12688/f1000research.7563.2},
- file = {:Users/ryan/Documents/Mendeley Desktop/Soneson, Love, Robinson - 2015 - Differential analyses for RNA-seq transcript-level estimates improve gene-level inferences.pdf:pdf},
- issn = {2046-1402},
- journal = {F1000Research},
- keywords = {RNA-seq,gene expression,quantification,transcriptomics},
- number = {0},
- pages = {1521},
- pmid = {26925227},
- title = {{Differential analyses for RNA-seq: transcript-level estimates improve gene-level inferences.}},
- url = {http://f1000research.com/articles/4-1521/v1{\%}5Cnhttp://www.ncbi.nlm.nih.gov/pubmed/26925227{\%}5Cnhttp://f1000research.com/articles/4-1521/v2},
- volume = {4},
- year = {2015}
- }
- @misc{Wilson2013,
- author = {Wilson, Richard K and Warren, Wesley},
- title = {{Macaca Fascicularis (cynomolgus macaque) Sequence Assembly}},
- url = {http://www.ncbi.nlm.nih.gov/assembly/GCF{\_}000364345.1},
- year = {2013}
- }
- @article{Head2014,
- abstract = {High-throughput sequencing, also known as next-generation sequencing (NGS), has revolutionized genomic research. In recent years, NGS technology has steadily improved, with costs dropping and the number and range of sequencing applications increasing exponentially. Here, we examine the critical role of sequencing library quality and consider important challenges when preparing NGS libraries from DNA and RNA sources. Factors such as the quantity and physical characteristics of the RNA or DNA source material as well as the desired application (i.e., genome sequencing, targeted sequencing, RNA-seq, ChIP-seq, RIP-seq, and methylation) are addressed in the context of preparing high quality sequencing libraries. In addition, the current methods for preparing NGS libraries from single cells are also discussed.},
- author = {Head, Steven R and Komori, H Kiyomi and Lamere, Sarah a and Whisenant, Thomas and {Van Nieuwerburgh}, Filip and Salomon, Daniel R and Ordoukhanian, Phillip},
- doi = {10.2144/000114133},
- file = {:Users/ryan/Documents/Mendeley Desktop/Head et al. - 2014 - Library construction for next-generation sequencing Overviews and challenges.pdf:pdf},
- issn = {1940-9818},
- journal = {BioTechniques},
- keywords = {chip-seq,deep sequencing,desired insert size,desired library size is,determined by the,dna,dna-seq,library preparation,next-generation sequencing,portion between the adapter,referring to the library,rip-seq,rna,rna-seq,sequences},
- month = {jan},
- number = {2},
- pages = {61--77},
- pmid = {24502796},
- title = {{Library construction for next-generation sequencing: Overviews and challenges.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/24502796},
- volume = {56},
- year = {2014}
- }
- @article{Lawrence2013,
- author = {Lawrence, Michael and Huber, Wolfgang and Pag{\`{e}}s, Herv{\'{e}} and Aboyoun, Patrick and Carlson, Marc and Gentleman, Robert and Morgan, Martin T. and Carey, Vincent J.},
- doi = {10.1371/journal.pcbi.1003118},
- editor = {Prlic, Andreas},
- file = {:Users/ryan/Documents/Mendeley Desktop/Lawrence et al. - 2013 - Software for Computing and Annotating Genomic Ranges.pdf:pdf},
- issn = {1553-7358},
- journal = {PLoS Computational Biology},
- month = {aug},
- number = {8},
- pages = {e1003118},
- title = {{Software for Computing and Annotating Genomic Ranges}},
- url = {http://dx.plos.org/10.1371/journal.pcbi.1003118},
- volume = {9},
- year = {2013}
- }
- @article{Amemiya2019,
- abstract = {Functional genomics assays based on high-throughput sequencing greatly expand our ability to understand the genome. Here, we define the ENCODE blacklist- a comprehensive set of regions in the human, mouse, worm, and fly genomes that have anomalous, unstructured, or high signal in next-generation sequencing experiments independent of cell line or experiment. The removal of the ENCODE blacklist is an essential quality measure when analyzing functional genomics data.},
- author = {Amemiya, Haley M. and Kundaje, Anshul and Boyle, Alan P.},
- doi = {10.1038/s41598-019-45839-z},
- file = {:Users/ryan/Documents/Mendeley Desktop/Amemiya, Kundaje, Boyle - 2019 - The ENCODE Blacklist Identification of Problematic Regions of the Genome.pdf:pdf},
- isbn = {4159801945839},
- issn = {20452322},
- journal = {Scientific Reports},
- number = {1},
- pages = {1--5},
- title = {{The ENCODE Blacklist: Identification of Problematic Regions of the Genome}},
- url = {https://www.nature.com/articles/s41598-019-45839-z.pdf},
- volume = {9},
- year = {2019}
- }
- @article{Cui2003,
- abstract = {Extracting biological information from microarray data requires appropriate statistical methods. The simplest statistical method for detecting differential expression is the t test, which can be used to compare two conditions when there is replication of samples. With more than two conditions, analysis of variance (ANOVA) can be used, and the mixed ANOVA model is a general and powerful approach for microarray experiments with multiple factors and/or several sources of variation.},
- author = {Cui, Xiangqin and Churchill, Gary a},
- file = {:Users/ryan/Documents/Mendeley Desktop/Cui, Churchill - 2003 - Statistical tests for differential expression in cDNA microarray experiments.pdf:pdf},
- issn = {1465-6914},
- journal = {Genome biology},
- keywords = {Analysis of Variance,Gene Expression Profiling,Gene Expression Profiling: statistics {\&} numerical,Matched-Pair Analysis,Oligonucleotide Array Sequence Analysis,Statistics as Topic},
- month = {jan},
- number = {4},
- pages = {210},
- pmid = {12702200},
- title = {{Statistical tests for differential expression in cDNA microarray experiments.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=154570{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {4},
- year = {2003}
- }
- @article{Liao2014,
- abstract = {Motivation: Next-generation sequencing technologies generate millions of short sequence reads, which are usually aligned to a reference genome. In many applications, the key information required for downstream analysis is the number of reads mapping to each genomic feature, for example to each exon or each gene. The process of counting reads is called read summarization. Read summarization is required for a great variety of genomic analyses but has so far received relatively little attention in the literature.$\backslash$nResults: We present featureCounts, a read summarization program suitable for counting reads generated from either RNA or genomic DNA sequencing experiments. featureCounts implements highly efficient chromosome hashing and feature blocking techniques. It is considerably faster than existing methods (by an order of magnitude for gene-level summarization) and requires far less computer memory. It works with either single or paired-end reads and provides a wide range of options appropriate for different sequencing applications.$\backslash$nAvailability and implementation: featureCounts is available under GNU General Public License as part of the Subread (http://subread.sourceforge.net) or Rsubread (http://www.bioconductor.org) software packages.$\backslash$nContact: shi@wehi.edu.au},
- archivePrefix = {arXiv},
- arxivId = {arXiv:1305.3347v2},
- author = {Liao, Yang and Smyth, Gordon K. and Shi, Wei},
- doi = {10.1093/bioinformatics/btt656},
- eprint = {arXiv:1305.3347v2},
- file = {:Users/ryan/Documents/Mendeley Desktop/Liao, Smyth, Shi - 2014 - FeatureCounts An efficient general purpose program for assigning sequence reads to genomic features.pdf:pdf},
- isbn = {1367-4811 (Electronic)
1367-4803 (Linking)},
- issn = {14602059},
- journal = {Bioinformatics},
- number = {7},
- pages = {923--930},
- pmid = {24227677},
- title = {{FeatureCounts: An efficient general purpose program for assigning sequence reads to genomic features}},
- url = {http://bioinformatics.oxfordjournals.org/content/30/7/923.full.pdf},
- volume = {30},
- year = {2014}
- }
- @article{Cline2005,
- abstract = {MOTIVATION: Many or most mammalian genes undergo alternative splicing, generating a variety of transcripts from a single gene. New information on splice variation is becoming available through technology for measuring expression levels of several exons or splice junctions per gene. We have developed a statistical method, ANalysis Of Splice VAriation (ANOSVA) to detect alternative splicing from expression data. Since ANOSVA requires no transcript information, it can be applied when the level of annotation is poor. When validated against spiked clone data, it generated no false positives and few false negatives. We demonstrated ANOSVA with data from a prototype mouse alternative splicing array, run against normal adult tissues, yielding a set of genes with evidence of tissue-specific splice variation.
- AVAILABILITY: The results are available at the supplementary information site.
- SUPPLEMENTARY INFORMATION: The results are available at the supplementary information site https://bioinfo.affymetrix.com/Papers/ANOSVA/},
- author = {Cline, Melissa S and Blume, John and Cawley, Simon and Clark, Tyson a and Hu, Jing-Shan and Lu, Gang and Salomonis, Nathan and Wang, Hui and Williams, Alan},
- doi = {10.1093/bioinformatics/bti1010},
- file = {:Users/ryan/Documents/Mendeley Desktop/Cline et al. - 2005 - ANOSVA a statistical method for detecting splice variation from expression data.pdf:pdf},
- issn = {1367-4803},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {Alternative Splicing,Animals,Computational Biology,Computational Biology: methods,Databases, Protein,False Positive Reactions,Gene Expression Profiling,Mice,Models, Statistical,Oligonucleotide Array Sequence Analysis,Reproducibility of Results,Software},
- month = {jun},
- pages = {i107--15},
- pmid = {15961447},
- title = {{ANOSVA: a statistical method for detecting splice variation from expression data.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/15961447},
- volume = {21 Suppl 1},
- year = {2005}
- }
- @incollection{Peng2012b,
- abstract = {Combinatorial binding of transcription factors (TFs) and cofactors to specific regulatory regions of target genes in vivo is an important mechanism of transcriptional regulation. Chromatin immunoprecipitation (ChIP) is a powerful technique to detect protein binding to specific regions of target genes in vivo. However, conventional ChIP analysis for individual factors (single ChIP) does not provide information on co-occupancy of two interacting TFs on target genes, even if both bind to the same chromatin regions. Double ChIP analysis involves sequential (double) immunoprecipitation of two chromatin-binding proteins and can be used to study co-occupancy of two or more factors on specific regions of the same DNA allele. Furthermore, by including a cell type-specific protein in double-ChIP, target co-occupancy in a specific cell type can be studied even if the other partner is more widely expressed. In this chapter, we describe a detailed protocol for double ChIP analysis in mouse retinas. Using the rod-specific transcription factor NR2E3 and the cone/rod homeobox protein CRX as examples, we show that NR2E3 and CRX are co-enriched on the promoter of active Rho and Rbp3 genes in rods, but are present to a much lesser degree on the promoters of silent cone opsin genes. These results suggest a new mechanism by which rod and cone genes are differentially regulated by these transcription factors in rod photoreceptors.},
- author = {Peng, Guang-Hua and Chen, Shiming},
- doi = {10.1007/978-1-62703-080-9_22},
- file = {:Users/ryan/Documents/Mendeley Desktop/Peng, Chen - 2012 - Double Chromatin Immunoprecipitation Analysis of Target Co-occupancy of Retinal Transcription Factors.pdf:pdf},
- pages = {311--328},
- title = {{Double Chromatin Immunoprecipitation: Analysis of Target Co-occupancy of Retinal Transcription Factors}},
- url = {http://link.springer.com/10.1007/978-1-62703-080-9{\_}22},
- year = {2012}
- }
- @misc{,
- abstract = {s41564-019-0480-z.pdf},
- file = {:Users/ryan/Documents/Mendeley Desktop/Unknown - Unknown - 801F42Df5010665E1392612B2B80D7607D43C650.Pdf.pdf:pdf},
- title = {{801F42Df5010665E1392612B2B80D7607D43C650.Pdf}},
- url = {https://www.nature.com/articles/s41571-019-0187-3.pdf}
- }
- @article{Hoffman2009,
- abstract = {The transcriptional networks underlying mammalian cell development and function are largely unknown. The recently described use of flow cell sequencing devices in combination with chromatin immunoprecipitation (ChIP-seq) stands to revolutionize the identification of DNA-protein interactions. As such, ChIP-seq is rapidly becoming the method of choice for the genome-wide localization of histone modifications and transcription factor binding sites. As further studies are performed, the information generated by ChIP-seq is expected to allow the development of a framework for networks describing the transcriptional regulation of cellular development and function. However, to date, this technology has been applied only to a small number of cell types, and even fewer tissues, suggesting a huge potential for novel discovery in this field.},
- author = {Hoffman, Brad G and Jones, Steven J M},
- doi = {10.1677/JOE-08-0526},
- file = {:Users/ryan/Documents/Mendeley Desktop/Hoffman, Jones - 2009 - Genome-wide identification of DNA-protein interactions using chromatin immunoprecipitation coupled with flow cel.pdf:pdf},
- issn = {1479-6805},
- journal = {The Journal of endocrinology},
- keywords = {Animals,Binding Sites,Binding Sites: genetics,Chromatin Immunoprecipitation,Chromatin Immunoprecipitation: methods,Chromosome Mapping,Chromosome Mapping: methods,DNA-Binding Proteins,DNA-Binding Proteins: metabolism,Humans,Models, Biological,Protein Binding,Sequence Analysis, DNA,Sequence Analysis, DNA: methods},
- month = {apr},
- number = {1},
- pages = {1--13},
- pmid = {19136617},
- title = {{Genome-wide identification of DNA-protein interactions using chromatin immunoprecipitation coupled with flow cell sequencing.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/19136617},
- volume = {201},
- year = {2009}
- }
- @article{Gerard2017a,
- abstract = {Unwanted variation, including hidden confounding, is a well-known problem in many fields, particularly large-scale gene expression studies. Recent proposals to use control genes --- genes assumed to be unassociated with the covariates of interest --- have led to new methods to deal with this problem. Going by the moniker Removing Unwanted Variation (RUV), there are many versions --- RUV1, RUV2, RUV4, RUVinv, RUVrinv, RUVfun. In this paper, we introduce a general framework, RUV*, that both unites and generalizes these approaches. This unifying framework helps clarify connections between existing methods. In particular we provide conditions under which RUV2 and RUV4 are equivalent. The RUV* framework also preserves an advantage of RUV approaches --- their modularity --- which facilitates the development of novel methods based on existing matrix imputation algorithms. We illustrate this by implementing RUVB, a version of RUV* based on Bayesian factor analysis. In realistic simulations based on real data we found that RUVB is competitive with existing methods in terms of both power and calibration, although we also highlight the challenges of providing consistently reliable calibration among data sets.},
- archivePrefix = {arXiv},
- arxivId = {1705.08393},
- author = {Gerard, David and Stephens, Matthew},
- eprint = {1705.08393},
- file = {:Users/ryan/Documents/Mendeley Desktop/Gerard, Stephens - 2017 - Unifying and Generalizing Methods for Removing Unwanted Variation Based on Negative Controls.pdf:pdf},
- keywords = {and phrases,batch effects,correlated tests,gene ex-,hidden confounding,negative controls,pression,rna-seq,unobserved confounding,unwanted variation},
- title = {{Unifying and Generalizing Methods for Removing Unwanted Variation Based on Negative Controls}},
- url = {http://arxiv.org/abs/1705.08393},
- year = {2017}
- }
- @book{Gentleman2005,
- address = {New York, NY},
- doi = {10.1007/0-387-29362-0},
- editor = {Gentleman, Robert and Carey, Vincent J. and Huber, Wolfgang and Irizarry, Rafael A. and Dudoit, Sandrine},
- isbn = {978-0-387-25146-2},
- publisher = {Springer New York},
- series = {Statistics for Biology and Health},
- title = {{Bioinformatics and Computational Biology Solutions Using R and Bioconductor}},
- url = {http://link.springer.com/10.1007/0-387-29362-0},
- year = {2005}
- }
- @article{Pawlikowska2014,
- abstract = {Several outlier and subgroup identification statistics (OASIS) have been proposed to discover transcriptomic features with outliers or multiple modes in expression that are indicative of distinct biological processes or subgroups. Here, we borrow ideas from the OASIS methods in the bioinformatics and statistics literature to develop the most informative spacing test (MIST) for unsupervised detection of such transcriptomic features. In an example application involving 14 cases of pediatric acute megakaryoblastic leukemia, MIST more robustly identified features that perfectly discriminate subjects according to gender or the presence of a prognostically relevant fusion-gene than did seven other OASIS methods in the analysis of RNA-seq exon expression, RNA-seq exon junction expression, and micorarray exon expression data. MIST was also effective at identifying features related to gender or molecular subtype in an example application involving 157 adult cases of acute myeloid leukemia. MIST will be freely available in the OASIS R package at http://www.stjuderesearch.org/site/depts/biostats.},
- author = {Pawlikowska, Iwona and Wu, Gang and Edmonson, Michael and Liu, Zhifa and Gruber, Tanja and Zhang, Jinghui and Pounds, Stan},
- doi = {10.1093/bioinformatics/btu039},
- file = {:Users/ryan/Documents/Mendeley Desktop/Pawlikowska et al. - 2014 - The Most Informative Spacing Test Effectively Discovers Biologically Relevant Outliers or Multiple Modes in.pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- month = {jan},
- pages = {1--9},
- pmid = {24458951},
- title = {{The Most Informative Spacing Test Effectively Discovers Biologically Relevant Outliers or Multiple Modes in Expression.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/24458951},
- year = {2014}
- }
- @article{Shin2014,
- abstract = {Background: The molecular profile of circulating blood can reflect physiological and pathological events occurring in other tissues and organs of the body and delivers a comprehensive view of the status of the immune system. Blood has been useful in studying the pathobiology of many diseases. It is accessible and easily collected making it ideally suited to the development of diagnostic biomarker tests. The blood transcriptome has a high complement of globin RNA that could potentially saturate next-generation sequencing platforms, masking lower abundance transcripts. Methods to deplete globin mRNA are available, but their effect has not been comprehensively studied in peripheral whole blood RNA-Seq data. In this study we aimed to assess technical variability associated with globin depletion in addition to assessing general technical variability in RNA-Seq from whole blood derived samples. Results: We compared technical and biological replicates having undergone globin depletion or not and found that the experimental globin depletion protocol employed removed approximately 80{\%} of globin transcripts, improved the correlation of technical replicates, allowed for reliable detection of thousands of additional transcripts and generally increased transcript abundance measures. Differential expression analysis revealed thousands of genes significantly upregulated as a result of globin depletion. In addition, globin depletion resulted in the down-regulation of genes involved in both iron and zinc metal ion bonding. Conclusions: Globin depletion appears to meaningfully improve the quality of peripheral whole blood RNA-Seq data, and may improve our ability to detect true biological variation. Some concerns remain, however. Key amongst them the significant reduction in RNA yields following globin depletion. More generally, our investigation of technical and biological variation with and without globin depletion finds that high-throughput sequencing by RNA-Seq is highly reproducible within a large dynamic range of detection and provides an accurate estimation of RNA concentration in peripheral whole blood. High-throughput sequencing is thus a promising technology for whole blood transcriptomics and biomarker discovery. ? 2014 Shin et al.},
- author = {Shin, Heesun and Shannon, Casey P. and Fishbane, Nick and Ruan, Jian and Zhou, Mi and Balshaw, Robert and Wilson-McManus, Janet E. and Ng, Raymond T. and McManus, Bruce M. and Tebbutt, Scott J.},
- doi = {10.1371/journal.pone.0091041},
- file = {:Users/ryan/Documents/Mendeley Desktop/Shin et al. - 2014 - Variation in RNA-Seq transcriptome profiles of peripheral whole blood from healthy individuals with and without glo.pdf:pdf},
- issn = {19326203},
- journal = {PLoS ONE},
- number = {3},
- pages = {1--11},
- title = {{Variation in RNA-Seq transcriptome profiles of peripheral whole blood from healthy individuals with and without globin depletion}},
- url = {https://journals.plos.org/plosone/article/file?id=10.1371/journal.pone.0091041{\&}type=printable},
- volume = {9},
- year = {2014}
- }
- @article{Tchourine2017,
- author = {Tchourine, Konstantine and Vogel, Christine and Bonneau, Richard and Foundation, Simons},
- file = {:Users/ryan/Documents/Mendeley Desktop/Tchourine et al. - 2017 - Explicit Modeling of RNA Stability Improves Large-Scale Inference of Transcription Regulation.pdf:pdf},
- journal = {bioRxiv},
- pages = {1--35},
- title = {{Explicit Modeling of RNA Stability Improves Large-Scale Inference of Transcription Regulation}},
- url = {http://biorxiv.org/content/early/2017/01/31/104885.abstract},
- year = {2017}
- }
- @article{Hoffman2016,
- abstract = {Gene expression datasets are complicated and have multiple sources of biological and technical variation. These datasets have recently become more complex as it is now feasible to assay gene expression from the same individual in multiple tissues or at multiple time points. The variancePar-tition package implements a statistical method to quantify the contribution of multiple sources of variation and decouple within/between-individual variation. In addition, variancePartition pro-duces results at the gene-level to identity genes that follow or deviate from the genome-wide trend.},
- author = {Hoffman, Gabriel},
- doi = {10.1101/040170},
- file = {:Users/ryan/Documents/Mendeley Desktop/Hoffman - 2016 - variancePartition Quantifying and interpreting drivers of variation in multilevel gene expression experiments.pdf:pdf},
- issn = {1471-2105},
- journal = {BMC Bioinformatics},
- keywords = {Transcriptome profiling,RNA-seq,Linear mixed model,linear mixed model,rna-seq,transcriptome profiling},
- pages = {10--12},
- publisher = {BMC Bioinformatics},
- title = {{variancePartition: Quantifying and interpreting drivers of variation in multilevel gene expression experiments}},
- url = {http://dx.doi.org/10.1186/s12859-016-1323-z http://download.springer.com/static/pdf/840/art{\%}253A10.1186{\%}252Fs12859-016-1323-z.pdf?originUrl=http{\%}3A{\%}2F{\%}2Fbmcbioinformatics.biomedcentral.com{\%}2Farticle{\%}2F10.1186{\%}2Fs12859-016-1323-z{\&}token2=exp=1483737098{~}acl=},
- year = {2016}
- }
- @article{Hachiya2009,
- author = {Hachiya, Tsuyoshi and Sakakibara, Yasubumi},
- file = {:Users/ryan/Documents/Mendeley Desktop/Hachiya, Sakakibara - 2009 - detection of conserved gene clusters unravels the evolutionary forces behind the correlation between protei.pdf:pdf},
- journal = {Genes, Genomes and Genomics},
- keywords = {abbreviation,comparative genomics,conserved gene cluster,genome organization,og,ortholog,orthologous gene,substitution rate},
- title = {detection of conserved gene clusters unravels the evolutionary forces behind the correlation between protein sequence homology and gene order conservation},
- url = {http://203.183.32.151/JournalsSup/images/0906/GGG{\_}3(SI1)31-45o.pdf},
- year = {2009}
- }
- @article{Triff2017,
- abstract = {During colon cancer, epigenetic alterations contribute to the dysregulation of major cellular functions and signaling pathways. Modifications in chromatin signatures such as H3K4me3 and H3K9ac, which are associated with transcriptionally active genes, can lead to genomic instability and perturb the expression of gene sets associated with oncogenic processes. In order to further elucidate early pre-tumorigenic epigenetic molecular events driving CRC, we integrated diverse, genome-wide, epigenetic inputs (by high throughput sequencing of RNA, H3K4me3, and H3K9ac) and compared differentially expressed transcripts (DE) and enriched regions (DER) in an in-vivo rat colon cancer progression model. Carcinogen (AOM) effects were detected genome-wide at the RNA (116 DE genes), K9ac (49 DERs including 24 genes) and K4me3 (7678 DERs including 3792 genes) level. RNA-seq differential expression and pathway analysis indicated that interferon-associated innate immune responses were impacted by AOM exposure. Despite extensive associations between K4me3 DERs and colon tumorigenesis (1210 genes were linked to colorectal carcinoma) including FOXO3, GNAI2, H2AFX, MSH2, NR3C1, PDCD4 and VEGFA, these changes were not reflected at the RNA gene expression level during early cancer progression. Collectively, our results indicate that carcinogen-induced changes in gene K4me3 DERs are harbingers of future transcriptional events, which drive malignant transformation of the colon.},
- author = {Triff, Karen and Pang, Jiahui and Callaway, Evelyn and Ivanov, Ivan and Chapkin, Robert S. and Triff, Karen and McLean, Mathew W. and Ivanov, Ivan and Konganti, Kranti and Zhou, Beiyan and Ivanov, Ivan and McLean, Mathew W.},
- doi = {10.1016/j.bbadis.2017.03.009},
- file = {:Users/ryan/Documents/Mendeley Desktop/Triff et al. - 2017 - Assessment of histone tail modifications and transcriptional profiling during colon cancer progression reveals a g.pdf:pdf},
- issn = {1879260X},
- journal = {Biochimica et Biophysica Acta - Molecular Basis of Disease},
- number = {6},
- pages = {1392--1402},
- title = {{Assessment of histone tail modifications and transcriptional profiling during colon cancer progression reveals a global decrease in H3K4me3 activity}},
- volume = {1863},
- year = {2017}
- }
- @article{Zhu2007,
- abstract = {Taking advantage of the complete genome sequences of several mammals, we developed a novel method to detect losses of well-established genes in the human genome through syntenic mapping of gene structures between the human, mouse, and dog genomes. Unlike most previous genomic methods for pseudogene identification, this analysis is able to differentiate losses of well-established genes from pseudogenes formed shortly after segmental duplication or generated via retrotransposition. Therefore, it enables us to find genes that were inactivated long after their birth, which were likely to have evolved nonredundant biological functions before being inactivated. The method was used to look for gene losses along the human lineage during the approximately 75 million years (My) since the common ancestor of primates and rodents (the euarchontoglire crown group). We identified 26 losses of well-established genes in the human genome that were all lost at least 50 My after their birth. Many of them were previously characterized pseudogenes in the human genome, such as GULO and UOX. Our methodology is highly effective at identifying losses of single-copy genes of ancient origin, allowing us to find a few well-known pseudogenes in the human genome missed by previous high-throughput genome-wide studies. In addition to confirming previously known gene losses, we identified 16 previously uncharacterized human pseudogenes that are definitive losses of long-established genes. Among them is ACYL3, an ancient enzyme present in archaea, bacteria, and eukaryotes, but lost approximately 6 to 8 Mya in the ancestor of humans and chimps. Although losses of well-established genes do not equate to adaptive gene losses, they are a useful proxy to use when searching for such genetic changes. This is especially true for adaptive losses that occurred more than 250,000 years ago, since any genetic evidence of the selective sweep indicative of such an event has been erased.},
- author = {Zhu, Jingchun and Sanborn, J Zachary and Diekhans, Mark and Lowe, Craig B and Pringle, Tom H and Haussler, David},
- doi = {10.1371/journal.pcbi.0030247},
- file = {:Users/ryan/Documents/Mendeley Desktop/Zhu et al. - 2007 - Comparative genomics search for losses of long-established genes on the human lineage.pdf:pdf},
- issn = {1553-7358},
- journal = {PLoS computational biology},
- keywords = {Animals,Biological Evolution,Chromosome Mapping,Chromosome Mapping: methods,DNA Mutational Analysis,DNA Mutational Analysis: methods,Dogs,Evolution, Molecular,Gene Deletion,Genetic Variation,Genetic Variation: genetics,Genome, Human,Genome, Human: genetics,Genomics,Genomics: methods,Humans,Mice,Pseudogenes,Pseudogenes: genetics},
- month = {dec},
- number = {12},
- pages = {e247},
- pmid = {18085818},
- title = {{Comparative genomics search for losses of long-established genes on the human lineage.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2134963{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {3},
- year = {2007}
- }
- @article{Steijger2013,
- abstract = {We evaluated 25 protocol variants of 14 independent computational methods for exon identification, transcript reconstruction and expression-level quantification from RNA-seq data. Our results show that most algorithms are able to identify discrete transcript components with high success rates but that assembly of complete isoform structures poses a major challenge even when all constituent elements are identified. Expression-level estimates also varied widely across methods, even when based on similar transcript models. Consequently, the complexity of higher eukaryotic genomes imposes severe limitations on transcript recall and splice product discrimination that are likely to remain limiting factors for the analysis of current-generation RNA-seq data.},
- author = {Steijger, Tamara and Abril, Josep F and Engstr{\"{o}}m, P{\"{a}}r G and Kokocinski, Felix and Akerman, Martin and Alioto, Tyler and Ambrosini, Giovanna and Antonarakis, Stylianos E and Behr, Jonas and Bertone, Paul and Bohnert, Regina and Bucher, Philipp and Cloonan, Nicole and Derrien, Thomas and Djebali, Sarah and Du, Jiang and Dudoit, Sandrine and Gerstein, Mark and Gingeras, Thomas R and Gonzalez, David and Grimmond, Sean M and Guig{\'{o}}, Roderic and Habegger, Lukas and Harrow, Jennifer and Hubbard, Tim J and Iseli, Christian and Jean, G{\'{e}}raldine and Kahles, Andr{\'{e}} and Lagarde, Julien and Leng, Jing and Lefebvre, Gregory and Lewis, Suzanna and Mortazavi, Ali and Niermann, Peter and R{\"{a}}tsch, Gunnar and Reymond, Alexandre and Ribeca, Paolo and Richard, Hugues and Rougemont, Jacques and Rozowsky, Joel and Sammeth, Michael and Sboner, Andrea and Schulz, Marcel H and Searle, Steven M J and Solorzano, Naryttza Diaz and Solovyev, Victor and Stanke, Mario and Stevenson, Brian J and Stockinger, Heinz and Valsesia, Armand and Weese, David and White, Simon and Wold, Barbara J and Wu, Jie and Wu, Thomas D and Zeller, Georg and Zerbino, Daniel and Zhang, Michael Q},
- doi = {10.1038/nmeth.2714},
- file = {:Users/ryan/Documents/Mendeley Desktop/Steijger et al. - 2013 - Assessment of transcript reconstruction methods for RNA-seq.pdf:pdf},
- issn = {1548-7105},
- journal = {Nature methods},
- month = {nov},
- number = {november},
- pages = {7--9},
- pmid = {24185837},
- title = {{Assessment of transcript reconstruction methods for RNA-seq.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/24185837},
- year = {2013}
- }
- @article{Berest2018,
- abstract = {Transcription factor (TF) activity constitutes an important readout of cellular signalling pathways and thus for assessing regulatory differences across conditions. However, current technologies lack the ability to simultaneously assess activity changes for multiple TFs and in particular to determine whether a specific TF acts as repressor or activator. To this end, we introduce a widely applicable genome-wide method diffTF to assess differential TF binding activity and classifying TFs as activator or repressor by integrating any type of genome-wide chromatin with RNA-Seq data and in-silico predicted TF binding sites (available at https://git.embl.de/grp-zaugg/diffTF). We apply diffTF to a large ATAC-Seq dataset of mutated and unmutated chronic lymphocytic leukemia and identify dozens of TFs that are differentially active. Around 40{\%} of them have a previously described association with CLL while {\~{}}60{\%} constitute potentially novel TFs driving the different CLL subtypes. Finally, we validated the method experimentally using the well studied system of hematopoietic differentiation in mouse.},
- author = {Berest, Ivan and Arnold, Christian and Reyes-Palomares, Armando and Palla, Giovanni and Rasmussen, Kasper Dindler and Helin, Kristian and Zaugg, Judith},
- doi = {10.1101/368498},
- file = {:Users/ryan/Documents/Mendeley Desktop/Berest et al. - 2018 - Quantification of differential transcription factor activity and multiomic-based classification into activators a.pdf:pdf},
- journal = {bioRxiv},
- pages = {368498},
- title = {{Quantification of differential transcription factor activity and multiomic-based classification into activators and repressors: diffTF}},
- url = {https://www.biorxiv.org/content/early/2018/07/13/368498},
- year = {2018}
- }
- @article{Zhu2010,
- abstract = {Chromatin immunoprecipitation (ChIP) followed by high-throughput sequencing (ChIP-seq) or ChIP followed by genome tiling array analysis (ChIP-chip) have become standard technologies for genome-wide identification of DNA-binding protein target sites. A number of algorithms have been developed in parallel that allow identification of binding sites from ChIP-seq or ChIP-chip datasets and subsequent visualization in the University of California Santa Cruz (UCSC) Genome Browser as custom annotation tracks. However, summarizing these tracks can be a daunting task, particularly if there are a large number of binding sites or the binding sites are distributed widely across the genome.},
- author = {Zhu, Lihua Julie and Gazin, Claude and Lawson, Nathan D and Pag{\`{e}}s, Herv{\'{e}} and Lin, Simon M and Lapointe, David S and Green, Michael R},
- doi = {10.1186/1471-2105-11-237},
- file = {:Users/ryan/Documents/Mendeley Desktop/Zhu et al. - 2010 - ChIPpeakAnno a Bioconductor package to annotate ChIP-seq and ChIP-chip data.pdf:pdf},
- issn = {1471-2105},
- journal = {BMC bioinformatics},
- keywords = {Binding Sites,Chromatin Immunoprecipitation,Chromatin Immunoprecipitation: methods,Genome,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Software},
- month = {jan},
- pages = {237},
- pmid = {20459804},
- title = {{ChIPpeakAnno: a Bioconductor package to annotate ChIP-seq and ChIP-chip data.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3098059{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {11},
- year = {2010}
- }
- @article{Subramanian2005,
- author = {Subramanian, Aravind and Tamayo, Pablo and Mootha, Vamsi K and Mukherjee, Sayan and Ebert, Benjamin L},
- file = {:Users/ryan/Documents/Mendeley Desktop/Subramanian et al. - 2005 - Gene set enrichment analysis A knowledge-based approach for interpreting genome-wide.pdf:pdf},
- title = {{Gene set enrichment analysis : A knowledge-based approach for interpreting genome-wide}},
- year = {2005}
- }
- @article{Esnaola2013,
- author = {Esnaola, Mikel and Puig, Pedro and Gonzalez, David and Castelo, Robert and Gonzalez, Juan R},
- doi = {10.1186/1471-2105-14-254},
- file = {:Users/ryan/Documents/Mendeley Desktop/Esnaola et al. - 2013 - A flexible count data model to fit the wide diversity of expression profiles arising from extensively replicated.pdf:pdf},
- issn = {1471-2105},
- journal = {BMC Bioinformatics},
- number = {1},
- pages = {254},
- title = {{A flexible count data model to fit the wide diversity of expression profiles arising from extensively replicated RNA-seq experiments}},
- url = {http://www.biomedcentral.com/1471-2105/14/254},
- volume = {14},
- year = {2013}
- }
- @techreport{VanIterson2016,
- author = {van Iterson, Maarten M. and van Zwet, Erik W. and Slagboom, P. Eline and Heijmans, Bastiaan T.},
- doi = {10.1101/055772},
- file = {:Users/ryan/Documents/Mendeley Desktop/van Iterson et al. - 2016 - Controlling bias and inflation in epigenome- and transcriptome-wide association studies using the empirical.pdf:pdf},
- keywords = {biological applications,graphene quantum dots,morphology and size control,review,synthetic methods,因其具有独特的电学和光学性质,在许多方面展现,石墨烯是一种具有网状周期结构的二维碳材料},
- month = {may},
- number = {50931004 51101120},
- pages = {337--344},
- title = {{Controlling bias and inflation in epigenome- and transcriptome-wide association studies using the empirical null distribution}},
- url = {http://biorxiv.org/content/biorxiv/early/2016/05/27/055772.full.pdf http://biorxiv.org/lookup/doi/10.1101/055772},
- year = {2016}
- }
- @misc{GenomeReferenceConsortium,
- author = {{Genome Reference Consortium}},
- title = {{Genome Reference Consortium Human Build 37 (GRCh37)}},
- url = {http://www.ncbi.nlm.nih.gov/assembly/GCF{\_}000001405.13/}
- }
- @article{Mostafavi2013,
- abstract = {Transcriptomic assays that measure expression levels are widely used to study the manifestation of environmental or genetic variations in cellular processes. RNA-sequencing in particular has the potential to considerably improve such understanding because of its capacity to assay the entire transcriptome, including novel transcriptional events. However, as with earlier expression assays, analysis of RNA-sequencing data requires carefully accounting for factors that may introduce systematic, confounding variability in the expression measurements, resulting in spurious correlations. Here, we consider the problem of modeling and removing the effects of known and hidden confounding factors from RNA-sequencing data. We describe a unified residual framework that encapsulates existing approaches, and using this framework, present a novel method, HCP (Hidden Covariates with Prior). HCP uses a more informed assumption about the confounding factors, and performs as well or better than existing approaches while having a much lower computational cost. Our experiments demonstrate that accounting for known and hidden factors with appropriate models improves the quality of RNA-sequencing data in two very different tasks: detecting genetic variations that are associated with nearby expression variations (cis-eQTLs), and constructing accurate co-expression networks.},
- author = {Mostafavi, Sara and Battle, Alexis and Zhu, Xiaowei and Urban, Alexander E. and Levinson, Douglas and Montgomery, Stephen B. and Koller, Daphne},
- doi = {10.1371/journal.pone.0068141},
- editor = {Benos, Panayiotis V.},
- file = {:Users/ryan/Documents/Mendeley Desktop/Mostafavi et al. - 2013 - Normalizing RNA-Sequencing Data by Modeling Hidden Covariates with Prior Knowledge.pdf:pdf},
- issn = {1932-6203},
- journal = {PloS one},
- month = {jan},
- number = {7},
- pages = {e68141},
- pmid = {23874524},
- title = {{Normalizing RNA-Sequencing Data by Modeling Hidden Covariates with Prior Knowledge.}},
- url = {http://dx.plos.org/10.1371/journal.pone.0068141 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3715474{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {8},
- year = {2013}
- }
- @article{Nellore2015,
- abstract = {RNA sequencing (RNA-seq) experiments now span hundreds to thousands of samples. A source of frustration for investigators analyzing a given dataset is the inability to rapidly and reproducibly align its samples jointly. Current spliced alignment software is designed to analyze each sample separately. Consequently, no information is gained from analyzing multiple samples together, and it is difficult to reproduce the exact analysis without access to original computing resources. We describe Rail-RNA, a cloud-enabled spliced aligner that analyzes many samples at once. Rail-RNA eliminates redundant work across samples, making it more efficient as samples are added. For many samples, Rail-RNA is more accurate than annotation-assisted aligners. We use Rail-RNA to align 666 RNA-seq samples from the GEUVADIS project on Amazon Web Services in 12 hours for US{\$}0.69 per sample. Rail-RNA produces alignments and base-resolution bigWig coverage files, ready for use with downstream packages for reproducible statistical analysis. We identify 290,416 expressed regions in the GEUVADIS samples, including 21,224 that map to intergenic sequence. We show that these regions show consistent patterns of variation across populations and with respect to known technological confounders. We identify expressed regions in the GEUVADIS samples and show that both annotated and unannotated (novel) expressed regions exhibit consistent patterns of variation across populations and with respect to known confounders. Rail-RNA is open-source software available at http://rail.bio .},
- author = {Nellore, Abhinav and Collado-Torres, Leonardo and Jaffe, Andrew E and Morton, James and Pritt, Jacob and Alquicira-Hern{\'{a}}ndez, Jos{\'{e}} and Leek, Jeffrey T and Langmead, Ben},
- doi = {10.1101/019067},
- file = {:Users/ryan/Documents/Mendeley Desktop/Nellore et al. - 2015 - Rail-RNA Scalable analysis of RNA-seq splicing and coverage.pdf:pdf},
- journal = {bioRxiv},
- pages = {019067},
- title = {{Rail-RNA: Scalable analysis of RNA-seq splicing and coverage}},
- url = {http://biorxiv.org/content/early/2015/05/07/019067.abstract},
- year = {2015}
- }
- @article{Hall2018,
- abstract = {Diabetes is an increasing problem worldwide; almost 30 million people, nearly 10{\%} of the population, in the United States are diagnosed with diabetes. Another 84 million are prediabetic, and without intervention, up to 70{\%} of these individuals may progress to type 2 diabetes. Current methods for quantifying blood glucose dysregulation in diabetes and prediabetes are limited by reliance on single-time-point measurements or on average measures of overall glycemia and neglect glucose dynamics. We have used continuous glucose monitoring (CGM) to evaluate the frequency with which individuals demonstrate elevations in postprandial glucose, the types of patterns, and how patterns vary between individuals given an identical nutrient challenge. Measurement of insulin resistance and secretion highlights the fact that the physiology underlying dysglycemia is highly variable between individuals. We developed an analytical framework that can group individuals according to specific patterns of glycemic responses called “glucotypes” that reveal heterogeneity, or subphenotypes, within traditional diagnostic categories of glucose regulation. Importantly, we found that even individuals considered normoglycemic by standard measures exhibit high glucose variability using CGM, with glucose levels reaching prediabetic and diabetic ranges 15{\%} and 2{\%} of the time, respectively. We thus show that glucose dysregulation, as characterized by CGM, is more prevalent and heterogeneous than previously thought and can affect individuals considered normoglycemic by standard measures, and specific patterns of glycemic responses reflect variable underlying physiology. The interindividual variability in glycemic responses to standardized meals also highlights the personal nature of glucose regulation. Through extensive phenotyping, we developed a model for identifying potential mechanisms of personal glucose dysregulation and built a webtool for visualizing a user-uploaded CGM profile and classifying individualized glucose patterns into glucotypes.},
- author = {Hall, Heather and Perelman, Dalia and Breschi, Alessandra and Limcaoco, Patricia and Kellogg, Ryan and McLaughlin, Tracey and Snyder, Michael},
- doi = {10.1371/journal.pbio.2005143},
- file = {:Users/ryan/Documents/Mendeley Desktop/Hall et al. - 2018 - Glucotypes reveal new patterns of glucose dysregulation.pdf:pdf},
- isbn = {1111111111},
- issn = {15457885},
- journal = {PLoS Biology},
- number = {7},
- pages = {1--23},
- title = {{Glucotypes reveal new patterns of glucose dysregulation}},
- url = {https://journals.plos.org/plosbiology/article/file?id=10.1371/journal.pbio.2005143{\&}type=printable},
- volume = {16},
- year = {2018}
- }
- @article{Li2018,
- abstract = {Background: Since the invention of next-generation RNA sequencing (RNA-seq) technologies, they have become a powerful tool to study the presence and quantity of RNA molecules in biological samples and have revolutionized transcriptomic studies. The analysis of RNA-seq data at four different levels (samples, genes, transcripts, and exons) involve multiple statistical and computational questions, some of which remain challenging up to date. Results: We review RNA-seq analysis tools at the sample, gene, transcript, and exon levels from a statistical perspective. We also highlight the biological and statistical questions of most practical considerations. Conclusion: The development of statistical and computational methods for analyzing RNA- seq data has made significant advances in the past decade. However, methods developed to answer the same biological question often rely on diverse statical models and exhibit different performance under different scenarios. This review discusses and compares multiple commonly used statistical models regarding their assumptions, in the hope of helping users select appropriate methods as needed, as well as assisting developers for future method development.},
- archivePrefix = {arXiv},
- arxivId = {1804.06050},
- author = {Li, Wei Vivian and Li, Jingyi Jessica},
- eprint = {1804.06050},
- file = {:Users/ryan/Documents/Mendeley Desktop/Li, Li - 2018 - Modeling and analysis of RNA-seq data a review from a statistical perspective.pdf:pdf},
- pages = {1--26},
- title = {{Modeling and analysis of RNA-seq data: a review from a statistical perspective}},
- url = {http://arxiv.org/abs/1804.06050},
- year = {2018}
- }
- @article{McCall2014,
- abstract = {MOTIVATION: Quantitative real-time PCR (qPCR) is one of the most widely used methods to measure gene expression. Despite extensive research in qPCR laboratory protocols, normalization and statistical analysis, little attention has been given to qPCR non-detects-those reactions failing to produce a minimum amount of signal.$\backslash$n$\backslash$nRESULTS: We show that the common methods of handling qPCR non-detects lead to biased inference. Furthermore, we show that non-detects do not represent data missing completely at random and likely represent missing data occurring not at random. We propose a model of the missing data mechanism and develop a method to directly model non-detects as missing data. Finally, we show that our approach results in a sizeable reduction in bias when estimating both absolute and differential gene expression. Availability and implementation: The proposed algorithm is implemented in the R package, nondetects. This package also contains the raw data for the three example datasets used in this manuscript. The package is freely available at http://mnmccall.com/software and as part of the Bioconductor project.$\backslash$n$\backslash$nCONTACT: mccallm@gmail.com.},
- author = {McCall, Matthew N. and McMurray, Helene R. and Land, Hartmut and Almudevar, Anthony},
- doi = {10.1093/bioinformatics/btu239},
- file = {:Users/ryan/Documents/Mendeley Desktop/McCall et al. - 2014 - On non-detects in qPCR data.pdf:pdf},
- issn = {1367-4803},
- journal = {Bioinformatics},
- month = {aug},
- number = {16},
- pages = {2310--2316},
- pmid = {24764462},
- title = {{On non-detects in qPCR data}},
- url = {https://academic.oup.com/bioinformatics/article-lookup/doi/10.1093/bioinformatics/btu239},
- volume = {30},
- year = {2014}
- }
- @book{James2013,
- abstract = {Statistical learning refers to a set of tools for modeling and understanding complex datasets. It is a recently developed area in statistics and blends with parallel developments in computer science and, in particular, machine learning. The field encompasses many methods such as the lasso and sparse regression, classification and regression trees, and boosting and support vector machines.},
- address = {New York, NY},
- author = {James, Gareth and Witten, Daniela and Hastie, Trevor and Tibshirani, Robert},
- booktitle = {Springer Texts in Statistics},
- doi = {10.1007/978-1-4614-7138-7},
- edition = {6},
- file = {:Users/ryan/Documents/Mendeley Desktop/James et al. - 2013 - An Introduction to Statistical Learning.pdf:pdf},
- isbn = {978-1-4614-7137-0},
- issn = {1431-875X},
- pages = {1--426},
- publisher = {Springer New York},
- series = {Springer Texts in Statistics},
- title = {{An Introduction to Statistical Learning}},
- url = {http://link.springer.com/10.1007/978-1-4614-7138-7 http://www-bcf.usc.edu/{~}gareth/ISL/ http://www-bcf.usc.edu/{~}gareth/ISL/ISLR Sixth Printing.pdf},
- volume = {103},
- year = {2013}
- }
- @article{Mccall,
- author = {Mccall, Matthew N and Bolstad, Benjamin M and Irizarry, Rafael A},
- file = {:Users/ryan/Documents/Mendeley Desktop/Mccall, Bolstad, Irizarry - Unknown - No Title.pdf:pdf},
- pages = {1--5},
- title = {{No Title}}
- }
- @article{Sarkar2017a,
- author = {Sarkar, Hirak and Patro, Rob},
- doi = {10.1093/bioinformatics/btx428},
- file = {:Users/ryan/Documents/Mendeley Desktop/Sarkar, Patro - 2017 - Quark enables semi-reference based compression of RNA-seq data.pdf:pdf},
- issn = {1367-4803},
- journal = {Bioinformatics},
- month = {jul},
- pages = {1--7},
- title = {{Quark enables semi-reference based compression of RNA-seq data}},
- url = {https://oup.silverchair-cdn.com/oup/backfile/Content{\_}public/Journal/bioinformatics/PAP/10.1093{\_}bioinformatics{\_}btx428/2/btx428.pdf?Expires=1499596720{\&}Signature=Ktgg0Hy46uQxBlQxaOmojJu{~}KMIHkRqMNIAi07zVV7G9C5cNOtcmK9AlfUaRna7FqTVkTboeCnCQbhNuFAs9U1eOUUzqrci2},
- year = {2017}
- }
- @article{Wu2013,
- abstract = {MOTIVATION: Although chromatin immunoprecipitation coupled with high-throughput sequencing (ChIP-seq) or tiling array hybridization (ChIP-chip) is increasingly used to map genome-wide-binding sites of transcription factors (TFs), it still remains difficult to generate a quality ChIPx (i.e. ChIP-seq or ChIP-chip) dataset because of the tremendous amount of effort required to develop effective antibodies and efficient protocols. Moreover, most laboratories are unable to easily obtain ChIPx data for one or more TF(s) in more than a handful of biological contexts. Thus, standard ChIPx analyses primarily focus on analyzing data from one experiment, and the discoveries are restricted to a specific biological context. RESULTS: We propose to enrich this existing data analysis paradigm by developing a novel approach, ChIP-PED, which superimposes ChIPx data on large amounts of publicly available human and mouse gene expression data containing a diverse collection of cell types, tissues and disease conditions to discover new biological contexts with potential TF regulatory activities. We demonstrate ChIP-PED using a number of examples, including a novel discovery that MYC, a human TF, plays an important functional role in pediatric Ewing sarcoma cell lines. These examples show that ChIP-PED increases the value of ChIPx data by allowing one to expand the scope of possible discoveries made from a ChIPx experiment. AVAILABILITY: http://www.biostat.jhsph.edu/{\~{}}gewu/ChIPPED/},
- author = {Wu, George and Yustein, Jason T. and McCall, Matthew N. and Zilliox, Michael and Irizarry, Rafael A. and Zeller, Karen and Dang, Chi V. and Ji, Hongkai},
- doi = {10.1093/bioinformatics/btt108},
- file = {:Users/ryan/Documents/Mendeley Desktop/Wu et al. - 2013 - ChIP-PED enhances the analysis of ChIP-seq and ChIP-chip data.pdf:pdf},
- isbn = {1367-4811 (Electronic)$\backslash$r1367-4803 (Linking)},
- issn = {1460-2059},
- journal = {Bioinformatics},
- number = {9},
- pages = {1182--1189},
- pmid = {23457041},
- title = {{ChIP-PED enhances the analysis of ChIP-seq and ChIP-chip data}},
- url = {https://academic.oup.com/bioinformatics/article-lookup/doi/10.1093/bioinformatics/btt108},
- volume = {29},
- year = {2013}
- }
- @article{Elzawahry2014,
- abstract = {Innate immune response involves protein-protein interactions, deoxyribonucleic acid (DNA)-protein interactions and signaling cascades. So far, thousands of protein-protein interactions have been curated as a static interaction map. However, protein-protein interactions involved in innate immune response are dynamic. We recorded the dynamics in the interactome during innate immune response by combining gene expression data of lipopolysaccharide (LPS)-stimulated dendritic cells with protein-protein interactions data. We identified the differences in interactome during innate immune response by constructing differential networks and identifying protein modules, which were up-/down-regulated at each stage during the innate immune response. For each protein complex, we identified enriched biological processes and pathways. In addition, we identified core interactions that are conserved throughout the innate immune response and their enriched gene ontology terms and pathways. We defined two novel measures to assess the differences between network maps at different time points. We found that the protein interaction network at 1 hour after LPS stimulation has the highest interactions protein ratio, which indicates a role for proteins with large number of interactions in innate immune response. A pairwise differential matrix allows for the global visualization of the differences between different networks. We investigated the toll-like receptor subnetwork and found that S100A8 is down-regulated in dendritic cells after LPS stimulation. Identified protein complexes have a crucial role not only in innate immunity, but also in circadian rhythms, pathways involved in cancer, and p53 pathways. The study confirmed previous work that reported a strong correlation between cancer and immunity.},
- author = {Elzawahry, Asmaa and Patil, Ashwini and Kumagai, Yutaro and Suzuki, Yutaka and Nakai, Kenta},
- doi = {10.4137/GRSB.S12850},
- file = {:Users/ryan/Documents/Mendeley Desktop/Elzawahry et al. - 2014 - Innate immunity interactome dynamics.pdf:pdf},
- issn = {1177-6250},
- journal = {Gene regulation and systems biology},
- keywords = {differential networks,gene expression,innate immunity,interactome dynamics,protein-protein interactions},
- month = {jan},
- pages = {1--15},
- pmid = {24453478},
- title = {{Innate immunity interactome dynamics.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3885269{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {8},
- year = {2014}
- }
- @article{Berman2010,
- abstract = {OBJECTIVE To test the graft-promoting effects of mesenchymal stem cells (MSCs) in a cynomolgus monkey model of islet/bone marrow transplantation. RESEARCH DESIGN AND METHODS Cynomolgus MSCs were obtained from iliac crest aspirate and characterized through passage 11 for phenotype, gene expression, differentiation potential, and karyotype. Allogeneic donor MSCs were cotransplanted intraportally with islets on postoperative day (POD) 0 and intravenously with donor marrow on PODs 5 and 11. Recipients were followed for stabilization of blood glucose levels, reduction of exogenous insulin requirement (EIR), C-peptide levels, changes in peripheral blood T regulatory cells, and chimerism. Destabilization of glycemia and increases in EIR were used as signs of rejection; additional intravenous MSCs were administered to test the effect on reversal of rejection. RESULTS MSC phenotype and a normal karyotype were observed through passage 11. IL-6, IL-10, vascular endothelial growth factor, TGF-$\beta$, hepatocyte growth factor, and galectin-1 gene expression levels varied among donors. MSC treatment significantly enhanced islet engraftment and function at 1 month posttransplant (n = 8), as compared with animals that received islets without MSCs (n = 3). Additional infusions of donor or third-party MSCs resulted in reversal of rejection episodes and prolongation of islet function in two animals. Stable islet allograft function was associated with increased numbers of regulatory T-cells in peripheral blood. CONCLUSIONS MSCs may provide an important approach for enhancement of islet engraftment, thereby decreasing the numbers of islets needed to achieve insulin independence. Furthermore, MSCs may serve as a new, safe, and effective antirejection therapy.},
- author = {Berman, Dora M. and Willman, Melissa A. and Han, Dongmei and Kleiner, Gary and Kenyon, Norman M. and Cabrera, Over and Karl, Julie A. and Wiseman, Roger W. and O'Connor, David H. and Bartholomew, Amelia M. and Kenyon, Norma S.},
- doi = {10.2337/db10-0136},
- file = {:Users/ryan/Documents/Mendeley Desktop/Berman et al. - 2010 - Mesenchymal stem cells enhance allogeneic islet engraftment in nonhuman primates.pdf:pdf},
- issn = {00121797},
- journal = {Diabetes},
- number = {10},
- pages = {2558--2568},
- title = {{Mesenchymal stem cells enhance allogeneic islet engraftment in nonhuman primates}},
- url = {https://diabetes.diabetesjournals.org/content/diabetes/59/10/2558.full.pdf},
- volume = {59},
- year = {2010}
- }
- @article{Breese2013,
- abstract = {SUMMARY: NGSUtils is a suite of software tools for manipulating data common to next-generation sequencing experiments, such as FASTQ, BED and BAM format files. These tools provide a stable and modular platform for data management and analysis.Availability and implementation: NGSUtils is available under a BSD license and works on Mac OS X and Linux systems. Python 2.6+ and virtualenv are required. More information and source code may be obtained from the website: http://ngsutils.org. CONTACT: yunliu@iupui.eduSupplemental information: Supplementary data are available at Bioinformatics online.},
- author = {Breese, Marcus R and Liu, Yunlong},
- doi = {10.1093/bioinformatics/bts731},
- file = {:Users/ryan/Documents/Mendeley Desktop/Breese, Liu - 2013 - NGSUtils a software suite for analyzing and manipulating next-generation sequencing datasets.pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- month = {jan},
- number = {4},
- pages = {494--496},
- pmid = {23314324},
- title = {{NGSUtils: a software suite for analyzing and manipulating next-generation sequencing datasets.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/23314324},
- volume = {29},
- year = {2013}
- }
- @article{Ule2006,
- abstract = {Nova proteins are a neuron-specific alternative splicing factors. We have combined bioinformatics, biochemistry and genetics to derive an RNA map describing the rules by which Nova proteins regulate alternative splicing. This map revealed that the position of Nova binding sites (YCAY clusters) in a pre-messenger RNA determines the outcome of splicing. The map correctly predicted Nova's effect to inhibit or enhance exon inclusion, which led us to examine the relationship between the map and Nova's mechanism of action. Nova binding to an exonic YCAY cluster changed the protein complexes assembled on pre-mRNA, blocking U1 snRNP (small nuclear ribonucleoprotein) binding and exon inclusion, whereas Nova binding to an intronic YCAY cluster enhanced spliceosome assembly and exon inclusion. Assays of splicing intermediates of Nova-regulated transcripts in mouse brain revealed that Nova preferentially regulates removal of introns harbouring (or closest to) YCAY clusters. These results define a genome-wide map relating the position of a cis-acting element to its regulation by an RNA binding protein, namely that Nova binding to YCAY clusters results in a local and asymmetric action to regulate spliceosome assembly and alternative splicing in neurons.},
- author = {Ule, Jernej and Stefani, Giovanni and Mele, Aldo and Ruggiu, Matteo and Wang, Xuning and Taneri, Bahar and Gaasterland, Terry and Blencowe, Benjamin J and Darnell, Robert B},
- doi = {10.1038/nature05304},
- file = {:Users/ryan/Documents/Mendeley Desktop/Ule et al. - 2006 - An RNA map predicting Nova-dependent splicing regulation.pdf:pdf},
- issn = {1476-4687},
- journal = {Nature},
- keywords = {Alternative Splicing,Alternative Splicing: physiology,Animals,Antigens,GABA-A,GABA-A: genetics,Humans,Introns,Mice,Neoplasm,Neoplasm: physiology,Nerve Tissue Proteins,Nerve Tissue Proteins: physiology,Nucleic Acid Conformation,Protein Binding,RNA,RNA Precursors,RNA Precursors: chemistry,RNA Precursors: metabolism,RNA-Binding Proteins,RNA-Binding Proteins: physiology,RNA: chemistry,RNA: physiology,Receptors,Ribonucleoprotein,U1 Small Nuclear,U1 Small Nuclear: antagonists {\&},U1 Small Nuclear: metabolism},
- month = {nov},
- number = {7119},
- pages = {580--6},
- pmid = {17065982},
- title = {{An RNA map predicting Nova-dependent splicing regulation.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/17065982},
- volume = {444},
- year = {2006}
- }
- @article{Pelz2008,
- abstract = {Microarray technology has become very popular for globally evaluating gene expression in biological samples. However, non-linear variation associated with the technology can make data interpretation unreliable. Therefore, methods to correct this kind of technical variation are critical. Here we consider a method to reduce this type of variation applied after three common procedures for processing microarray data: MAS 5.0, RMA, and dChip.},
- author = {Pelz, Carl R and Kulesz-Martin, Molly and Bagby, Grover and Sears, Rosalie C},
- doi = {10.1186/1471-2105-9-520},
- file = {:Users/ryan/Documents/Mendeley Desktop/Pelz et al. - 2008 - Global rank-invariant set normalization (GRSN) to reduce systematic distortions in microarray data.pdf:pdf},
- issn = {1471-2105},
- journal = {BMC bioinformatics},
- keywords = {Analysis of Variance,Artifacts,Computational Biology,Computational Biology: methods,Computer Simulation,Data Interpretation, Statistical,Databases, Genetic,Gene Expression Profiling,Gene Expression Regulation,Models, Genetic,Oligonucleotide Array Sequence Analysis,Reproducibility of Results,Signal Transduction},
- month = {jan},
- pages = {520},
- pmid = {19055840},
- title = {{Global rank-invariant set normalization (GRSN) to reduce systematic distortions in microarray data.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2644708{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {9},
- year = {2008}
- }
- @article{Madrigal2016,
- abstract = {Computational evaluation of variability across DNA or RNA sequencing datasets is a crucial step in genomic science, as it allows both to evaluate the reproducibility across biological or technical replicates, and to compare different datasets to identify their potential correlations. Here I present fCCAC, an application of functional canonical correlation analysis to assess covariance of nucleic acid sequencing datasets such as chromatin immunoprecipitation followed by deep sequencing (ChIP-seq). I exemplify how this method can reveal shared covariance between histone modifications and DNA binding proteins, such as the relationship between the H3K4me3 chromatin mark and its epigenetic writers and readers. R code is publicly available at http://github.com/pmb59/fCCAC/.},
- author = {Madrigal, Pedro},
- doi = {10.1101/060780},
- file = {:Users/ryan/Documents/Mendeley Desktop/Madrigal - 2016 - fCCAC functional canonical correlation analysis to evaluate covariance between nucleic acid sequencing datasets.pdf:pdf},
- journal = {bioRxiv},
- pages = {060780},
- title = {{fCCAC: functional canonical correlation analysis to evaluate covariance between nucleic acid sequencing datasets}},
- url = {http://biorxiv.org/content/biorxiv/early/2016/06/27/060780.full.pdf},
- year = {2016}
- }
- @article{Ferreira2013,
- author = {Ferreira, P. G. and Jares, P. and Rico, D. and Gomez-Lopez, G. and Martinez-Trillos, a. and Villamor, N. and Ecker, S. and Gonzalez-Perez, a. and Knowles, D. G. and Monlong, J. and Johnson, R. and Quesada, V. and Gouin, a. and Djebali, S. and Lopez-Guerra, M. and Colomer, D. and Royo, C. and Cazorla, M. and Pinyol, M. and Clot, G. and Aymerich, M. and Rozman, M. and Kulis, M. and Tamborero, D. and Papasaikas, P. and Blanc, J. and Gut, M. and Gut, I. and Puente, X. S. and Pisano, D. G. and Martin-Subero, J. I. and Lopez-Bigas, N. and Lopez-Guillermo, a. and Valencia, a. and Lopez-Otin, C. and Campo, E. and Guigo, R.},
- doi = {10.1101/gr.152132.112},
- file = {:Users/ryan/Documents/Mendeley Desktop/Ferreira et al. - 2013 - Transcriptome characterization by RNA sequencing identifies a major molecular and clinical subdivision in chron.pdf:pdf},
- issn = {1088-9051},
- journal = {Genome Research},
- month = {nov},
- title = {{Transcriptome characterization by RNA sequencing identifies a major molecular and clinical subdivision in chronic lymphocytic leukemia}},
- url = {http://genome.cshlp.org/cgi/doi/10.1101/gr.152132.112},
- year = {2013}
- }
- @article{Cline2007a,
- abstract = {Cytoscape is a free software package for visualizing, modeling and analyzing molecular and genetic interaction networks. This protocol explains how to use Cytoscape to analyze the results of mRNA expression profiling, and other functional genomics and proteomics experiments, in the context of an interaction network obtained for genes of interest. Five major steps are described: (i) obtaining a gene or protein network, (ii) displaying the network using layout algorithms, (iii) integrating with gene expression and other functional attributes, (iv) identifying putative complexes and functional modules and (v) identifying enriched Gene Ontology annotations in the network. These steps provide a broad sample of the types of analyses performed by Cytoscape.},
- author = {Cline, Melissa S and Smoot, Michael and Cerami, Ethan and Kuchinsky, Allan and Landys, Nerius and Workman, Chris and Christmas, Rowan and Avila-Campilo, Iliana and Creech, Michael and Gross, Benjamin and Hanspers, Kristina and Isserlin, Ruth and Kelley, Ryan and Killcoyne, Sarah and Lotia, Samad and Maere, Steven and Morris, John and Ono, Keiichiro and Pavlovic, Vuk and Pico, Alexander R and Vailaya, Aditya and Wang, Peng-Liang and Adler, Annette and Conklin, Bruce R and Hood, Leroy and Kuiper, Martin and Sander, Chris and Schmulevich, Ilya and Schwikowski, Benno and Warner, Guy J and Ideker, Trey and Bader, Gary D},
- doi = {10.1038/nprot.2007.324},
- file = {:Users/ryan/Documents/Mendeley Desktop/Cline et al. - 2007 - Integration of biological networks and gene expression data using Cytoscape.pdf:pdf},
- issn = {1750-2799},
- journal = {Nature protocols},
- keywords = {Computational Biology,Computational Biology: methods,Gene Expression Profiling,Gene Expression Profiling: methods,Gene Regulatory Networks,Genomics,Genomics: methods,Proteomics,Proteomics: methods,RNA, Messenger,RNA, Messenger: metabolism,Software},
- month = {jan},
- number = {10},
- pages = {2366--82},
- pmid = {17947979},
- title = {{Integration of biological networks and gene expression data using Cytoscape.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3685583{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {2},
- year = {2007}
- }
- @article{Bi2013,
- abstract = {BACKGROUND: RNA-seq, a massive parallel-sequencing-based transcriptome profiling method, provides digital data in the form of aligned sequence read counts. The comparative analyses of the data require appropriate statistical methods to estimate the differential expression of transcript variants across different cell/tissue types and disease conditions.
- RESULTS: We developed a novel nonparametric empirical Bayesian-based approach (NPEBseq) to model the RNA-seq data. The prior distribution of the Bayesian model is empirically estimated from the data without any parametric assumption, and hence the method is "nonparametric" in nature. Based on this model, we proposed a method for detecting differentially expressed genes across different conditions. We also extended this method to detect differential usage of exons from RNA-seq data. The evaluation of NPEBseq on both simulated and publicly available RNA-seq datasets and comparison with three popular methods showed improved results for experiments with or without biological replicates.
- CONCLUSIONS: NPEBseq can successfully detect differential expression between different conditions not only at gene level but also at exon level from RNA-seq datasets. In addition, NPEBSeq performs significantly better than current methods and can be applied to genome-wide RNA-seq datasets. Sample datasets and R package are available at http://bioinformatics.wistar.upenn.edu/NPEBseq.},
- author = {Bi, Yingtao and Davuluri, Ramana V},
- doi = {10.1186/1471-2105-14-262},
- file = {:Users/ryan/Documents/Mendeley Desktop/Bi, Davuluri - 2013 - NPEBseq nonparametric empirical bayesian-based procedure for differential expression analysis of RNA-seq data.pdf:pdf},
- issn = {1471-2105},
- journal = {BMC bioinformatics},
- month = {aug},
- number = {1},
- pages = {262},
- pmid = {23981227},
- publisher = {BMC Bioinformatics},
- title = {{NPEBseq: nonparametric empirical bayesian-based procedure for differential expression analysis of RNA-seq data.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3765716{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {14},
- year = {2013}
- }
- @article{Kharchenko2008,
- author = {Kharchenko, PV and Tolstorukov, MY},
- doi = {10.1038/nbt.1508.Design},
- file = {:Users/ryan/Documents/Mendeley Desktop/Kharchenko, Tolstorukov - 2008 - Design and analysis of ChIP-seq experiments for DNA-binding proteins.pdf:pdf},
- journal = {Nature biotechnology},
- keywords = {SPP},
- mendeley-tags = {SPP},
- number = {12},
- pages = {1351--1359},
- title = {{Design and analysis of ChIP-seq experiments for DNA-binding proteins}},
- url = {http://www.nature.com/nbt/journal/v26/n12/abs/nbt.1508.html http://compbio.med.harvard.edu/Supplements/ChIP-seq/tutorial.html},
- volume = {26},
- year = {2008}
- }
- @article{Luco2010,
- abstract = {Alternative splicing of pre-mRNA is a prominent mechanism to generate protein diversity, yet its regulation is poorly understood. We demonstrated a direct role for histone modifications in alternative splicing. We found distinctive histone modification signatures that correlate with the splicing outcome in a set of human genes, and modulation of histone modifications causes splice site switching. Histone marks affect splicing outcome by influencing the recruitment of splicing regulators via a chromatin-binding protein. These results outline an adaptor system for the reading of histone marks by the pre-mRNA splicing machinery.},
- author = {Luco, Reini F and Pan, Qun and Tominaga, Kaoru and Blencowe, Benjamin J and Pereira-Smith, Olivia M and Misteli, Tom},
- doi = {10.1126/science.1184208},
- file = {:Users/ryan/Documents/Mendeley Desktop/Luco et al. - 2010 - Regulation of alternative splicing by histone modifications.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Luco et al. - 2010 - Regulation of alternative splicing by histone modifications(2).pdf:pdf},
- issn = {1095-9203},
- journal = {Science (New York, N.Y.)},
- keywords = {Alternative Splicing,Cell Line,Chromatin,Chromatin: metabolism,Epithelial Cells,Epithelial Cells: metabolism,Exons,Fibroblast Growth Factor,Histone-Lysine N-Methyltransferase,Histone-Lysine N-Methyltransferase: genetics,Histone-Lysine N-Methyltransferase: metabolism,Histones,Histones: metabolism,Humans,Male,Mesenchymal Stem Cells,Mesenchymal Stem Cells: metabolism,Polypyrimidine Tract-Binding Protein,Polypyrimidine Tract-Binding Protein: metabolism,Prostate,Prostate: cytology,Protein Binding,RNA Precursors,RNA Precursors: metabolism,Receptor,Transcription Factors,Transcription Factors: genetics,Transcription Factors: metabolism,Type 2,Type 2: genetics},
- month = {feb},
- number = {5968},
- pages = {996--1000},
- pmid = {20133523},
- title = {{Regulation of alternative splicing by histone modifications.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2913848{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {327},
- year = {2010}
- }
- @article{Storey2002,
- abstract = {Multiple-hypothesis testing involves guarding against much more complicated errors than single-hypothesis testing. Whereas we typically control the type I error rate for a single-hypothesis test, a compound error rate is controlled for multiple-hypothesis tests. For example, controlling the false discovery rate FDR traditionally involves intricate sequential p-value rejection methods based on the observed data. Whereas a sequential p-value method fixes the error rate and estimates its corresponding rejection region, we propose the opposite approach—we fix the rejection region and then estimate its corresponding error rate. This new approach offers increased applicability, accuracy and power. We apply the methodology to both the positive false discovery rate pFDR and FDR, and provide evidence for its benefits. It is shown that pFDR is probably the quantity of interest over FDR. Also discussed is the calculation of the q-value, the pFDR analogue of the p-value, which eliminates the need to set the error rate beforehand as is traditionally done. Some simple numerical examples are presented that show that this new approach can yield an increase of over eight times in power compared with the Benjamini–Hochberg FDR method.},
- author = {Storey, John D.},
- doi = {10.1111/1467-9868.00346},
- file = {:Users/ryan/Documents/Mendeley Desktop/Storey - 2002 - A direct approach to false discovery rates.pdf:pdf},
- isbn = {9781405122382},
- issn = {13697412},
- journal = {Journal of the Royal Statistical Society: Series B (Statistical Methodology)},
- keywords = {false discovery rate,multiple comparisons,p -values,positive false discovery rate,q -values,sequential p -value methods,simultaneous inference},
- month = {aug},
- number = {3},
- pages = {479--498},
- pmid = {11917092},
- title = {{A direct approach to false discovery rates}},
- url = {http://www.genomine.org/papers/directfdr.pdf http://doi.wiley.com/10.1111/1467-9868.00346},
- volume = {64},
- year = {2002}
- }
- @article{Anders2012,
- author = {Anders, Simon and Reyes, Alejandro and Huber, Wolfgang},
- file = {:Users/ryan/Documents/Mendeley Desktop/Anders, Reyes, Huber - 2012 - Detecting differential usage of exons from RNA-seq data.pdf:pdf},
- journal = {Genome Research},
- pages = {1--30},
- title = {{Detecting differential usage of exons from RNA-seq data}},
- url = {http://genome.cshlp.org/content/22/10/2008.short},
- year = {2012}
- }
- @article{Dorff2013,
- abstract = {We present GobyWeb, a web-based system that facilitates the management and analysis of high-throughput sequencing (HTS) projects. The software provides integrated support for a broad set of HTS analyses and offers a simple plugin extension mechanism. Analyses currently supported include quantification of gene expression for messenger and small RNA sequencing, estimation of DNA methylation (i.e., reduced bisulfite sequencing and whole genome methyl-seq), or the detection of pathogens in sequenced data. In contrast to previous analysis pipelines developed for analysis of HTS data, GobyWeb requires significantly less storage space, runs analyses efficiently on a parallel grid, scales gracefully to process tens or hundreds of multi-gigabyte samples, yet can be used effectively by researchers who are comfortable using a web browser. We conducted performance evaluations of the software and found it to either outperform or have similar performance to analysis programs developed for specialized analyses of HTS data. We found that most biologists who took a one-hour GobyWeb training session were readily able to analyze RNA-Seq data with state of the art analysis tools. GobyWeb can be obtained at http://gobyweb.campagnelab.org and is freely available for non-commercial use. GobyWeb plugins are distributed in source code and licensed under the open source LGPL3 license to facilitate code inspection, reuse and independent extensions http://github.com/CampagneLaboratory/gobyweb2-plugins.},
- author = {Dorff, Kevin C and Chambwe, Nyasha and Zeno, Zachary and Simi, Manuele and Shaknovich, Rita and Campagne, Fabien},
- doi = {10.1371/journal.pone.0069666},
- file = {:Users/ryan/Documents/Mendeley Desktop/Dorff et al. - 2013 - GobyWeb Simplified Management and Analysis of Gene Expression and DNA Methylation Sequencing Data.pdf:pdf},
- issn = {1932-6203},
- journal = {PloS one},
- month = {jan},
- number = {7},
- pages = {e69666},
- pmid = {23936070},
- title = {{GobyWeb: Simplified Management and Analysis of Gene Expression and DNA Methylation Sequencing Data.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3720652{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {8},
- year = {2013}
- }
- @article{Quail2012,
- author = {Quail, Michael and Smith, Miriam E and Coupland, Paul and Otto, Thomas D and Harris, Simon R and Connor, Thomas R and Bertoni, Anna and Swerdlow, Harold P and Gu, Yong},
- doi = {10.1186/1471-2164-13-341},
- file = {:Users/ryan/Documents/Mendeley Desktop/Quail et al. - 2012 - A tale of three next generation sequencing platforms comparison of Ion torrent, pacific biosciences and illumina M.pdf:pdf},
- issn = {1471-2164},
- journal = {BMC Genomics},
- number = {1},
- pages = {341},
- title = {{A tale of three next generation sequencing platforms: comparison of Ion torrent, pacific biosciences and illumina MiSeq sequencers}},
- url = {http://www.biomedcentral.com/1471-2164/13/341},
- volume = {13},
- year = {2012}
- }
- @article{Emig2010,
- abstract = {Alternative splicing is an important mechanism for increasing protein diversity. However, its functional effects are largely unknown. Here, we present our new software workflow composed of the open-source application AltAnalyze and the Cytoscape plugin DomainGraph. Both programs provide an intuitive and comprehensive end-to-end solution for the analysis and visualization of alternative splicing data from Affymetrix Exon and Gene Arrays at the level of proteins, domains, microRNA binding sites, molecular interactions and pathways. Our software tools include easy-to-use graphical user interfaces, rigorous statistical methods (FIRMA, MiDAS and DABG filtering) and do not require prior knowledge of exon array analysis or programming. They provide new methods for automatic interpretation and visualization of the effects of alternative exon inclusion on protein domain composition and microRNA binding sites. These data can be visualized together with affected pathways and gene or protein interaction networks, allowing a straightforward identification of potential biological effects due to alternative splicing at different levels of granularity. Our programs are available at http://www.altanalyze.org and http://www.domaingraph.de. These websites also include extensive documentation, tutorials and sample data.},
- author = {Emig, Dorothea and Salomonis, Nathan and Baumbach, Jan and Lengauer, Thomas and Conklin, Bruce R and Albrecht, Mario},
- doi = {10.1093/nar/gkq405},
- file = {:Users/ryan/Documents/Mendeley Desktop/Emig et al. - 2010 - AltAnalyze and DomainGraph analyzing and visualizing exon expression data.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Emig et al. - 2010 - AltAnalyze and DomainGraph analyzing and visualizing exon expression data.pdf:pdf},
- issn = {1362-4962},
- journal = {Nucleic acids research},
- keywords = {Alternative Splicing,Animals,Computer Graphics,Exons,Gene Expression Profiling,Humans,Internet,Mice,Rats,Software},
- month = {jul},
- number = {Web Server issue},
- pages = {W755--62},
- pmid = {20513647},
- title = {{AltAnalyze and DomainGraph: analyzing and visualizing exon expression data.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2896198{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {38},
- year = {2010}
- }
- @article{Delhomme2012,
- abstract = {RNA sequencing is becoming a standard for expression profiling experiments and many tools have been developed in the past few years to analyze RNA-Seq data. Numerous 'Bioconductor' packages are available for next-generation sequencing data loading in R, e.g. ShortRead and Rsamtools as well as to perform differential gene expression analyses, e.g. DESeq and edgeR. However, the processing tasks lying in between these require the precise interplay of many Bioconductor packages, e.g. Biostrings, IRanges or external solutions are to be sought.},
- author = {Delhomme, Nicolas and Padioleau, Isma{\"{e}}l and Furlong, Eileen E and Steinmetz, Lars M},
- doi = {10.1093/bioinformatics/bts477},
- file = {:Users/ryan/Documents/Mendeley Desktop/Delhomme et al. - 2012 - easyRNASeq a bioconductor package for processing RNA-Seq data.pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- month = {oct},
- number = {19},
- pages = {2532--3},
- pmid = {22847932},
- title = {{easyRNASeq: a bioconductor package for processing RNA-Seq data.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3463124{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {28},
- year = {2012}
- }
- @article{Taudt2016,
- abstract = {Post-translational modifications of histone residue tails are an important component of genome regulation. It is becoming increasingly clear that the combinatorial presence and absence of various modifications define discrete chromatin states which determine the functional properties of a locus. An emerging experimental goal is to compare genome-wide chromatin state maps across different conditions, such as experimental treatments, cell-types or developmental time points. Here we present chromstaR, an algorithm for the computational inference of combinatorial chromatin state dynamics across an arbitrary number of conditions. ChromstaR uses a multivariate Hidden Markov Model to assign every genomic region to a discrete combinatorial chromatin state based on the presence/absence of each modification in every condition. This interpretation makes it easy to relate the inferred chromatin states back to the underlying histone modification patterns. Moreover, the algorithm computes the number of combinatorial chromatin states that are present in the genome without having to specify them a priori, thus providing an unbiased picture of their genome-wide frequencies. We demonstrate the advantages of chromstaR in the context of three common experimental data scenarios. First, we study how different histone modifications combine to form combinatorial chromatin states in a single tissue. Second, we infer genome-wide patterns of combinatorial state differences between two cell types or conditions. Finally, we study the dynamics of combinatorial chromatin states during tissue differentiation involving up to six differentiation points. chromstaR is a versatile computational tool that facilitates a deeper biological understanding of chromatin organization and dynamics. The algorithm is written in C++ and freely availableas an R-package at https://github.com/ataudt/chromstaR.},
- author = {Taudt, Aaron and Nguyen, Minh Anh and Heinig, Matthias and Johannes, Frank and Colome-Tatche, Maria},
- doi = {10.1101/038612},
- file = {:Users/ryan/Documents/Mendeley Desktop/Taudt et al. - 2016 - chromstaR Tracking combinatorial chromatin state dynamics in space and time.pdf:pdf},
- journal = {bioRxiv},
- keywords = {chip-seq,chromatin state map,computational biology,epigenetics,histone modification},
- pages = {038612},
- title = {{chromstaR: Tracking combinatorial chromatin state dynamics in space and time}},
- url = {http://biorxiv.org/content/early/2016/02/04/038612.abstract},
- year = {2016}
- }
- @article{Bonafede2014,
- archivePrefix = {arXiv},
- arxivId = {arXiv:1410.8093v2},
- author = {Bonafede, Elisabetta and Picard, Franck and Viroli, Cinzia and Sciences, Statistical and Evolutive, Biologie and Cnrs, U M R and November, France},
- doi = {10.1111/biom.12458},
- eprint = {arXiv:1410.8093v2},
- file = {:Users/ryan/Documents/Mendeley Desktop/Bonafede et al. - 2014 - Modelling overdispersion heterogeneity in differential expression analysis using mixtures.pdf:pdf},
- issn = {0006341X},
- keywords = {hypothesis testing,mixture models,rna-seq data},
- pages = {1--22},
- title = {{Modelling overdispersion heterogeneity in differential expression analysis using mixtures}},
- url = {http://arxiv.org/pdf/1410.8093v2.pdf},
- year = {2014}
- }
- @article{Goecks2010,
- abstract = {Increased reliance on computational approaches in the life sciences has revealed grave concerns about how accessible and reproducible computation-reliant results truly are. Galaxy http://usegalaxy.org, an open web-based platform for genomic research, addresses these problems. Galaxy automatically tracks and manages data provenance and provides support for capturing the context and intent of computational methods. Galaxy Pages are interactive, web-based documents that provide users with a medium to communicate a complete computational analysis.},
- author = {Goecks, Jeremy and Nekrutenko, Anton and Taylor, James},
- doi = {10.1186/gb-2010-11-8-r86},
- file = {:Users/ryan/Documents/Mendeley Desktop/Goecks, Nekrutenko, Taylor - 2010 - Galaxy a comprehensive approach for supporting accessible, reproducible, and transparent computation.pdf:pdf},
- issn = {1465-6914},
- journal = {Genome biology},
- keywords = {Algorithms,Animals,Computational Biology,Computational Biology: methods,Databases, Nucleic Acid,Genomics,Genomics: methods,Humans,Internet},
- month = {jan},
- number = {8},
- pages = {R86},
- pmid = {20738864},
- title = {{Galaxy: a comprehensive approach for supporting accessible, reproducible, and transparent computational research in the life sciences.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2945788{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {11},
- year = {2010}
- }
- @article{Leng2013,
- author = {Leng, Ning and Dawson, JA and Thomson, James},
- file = {:Users/ryan/Documents/Mendeley Desktop/Leng, Dawson, Thomson - 2013 - EBSeq An empirical Bayes hierarchical model for inference in RNA-seq experiments.pdf:pdf},
- journal = {{\ldots}},
- title = {{EBSeq: An empirical Bayes hierarchical model for inference in RNA-seq experiments}},
- url = {http://bioinformatics.oxfordjournals.org/content/early/2013/02/21/bioinformatics.btt087.short},
- year = {2013}
- }
- @article{Efron1997,
- abstract = {A study investigates the error rate of a rule for predicting future responses constructed from a training set of data. Results are nonparametric and apply to any possible prediction rule.},
- author = {Efron, B. and Tibshirani, R.},
- doi = {10.1080/01621459.1997.10474007},
- file = {:Users/ryan/Documents/Mendeley Desktop/Efron, Tibshirani - 1997 - Improvements on cross-validation The .632 plus bootstrap method.pdf:pdf},
- isbn = {0162-1459},
- issn = {0162-1459},
- journal = {Journal of the American Statistical Association},
- keywords = {classification,cross-validation bootstrap,prediction rule},
- number = {438},
- pages = {548},
- pmid = {370},
- title = {{Improvements on cross-validation: The .632 plus bootstrap method}},
- url = {http://www.stat.washington.edu/courses/stat527/s13/readings/EfronTibshirani{\_}JASA{\_}1997.pdf},
- volume = {92},
- year = {1997}
- }
- @article{Lund2012,
- abstract = {Next generation sequencing technology provides a powerful tool for measuring gene expression (mRNA) levels in the form of RNA-sequence data. Method development for identifying differentially expressed (DE) genes from RNA-seq data, which frequently includes many low-count integers and can exhibit severe overdispersion relative to Poisson or binomial distributions, is a popular area of ongoing research. Here we present quasi-likelihood methods with shrunken dispersion estimates based on an adaptation of Smyth's (2004) approach to estimating gene-specific error variances for microarray data. Our suggested methods are computationally simple, analogous to ANOVA and compare favorably versus competing methods in detecting DE genes and estimating false discovery rates across a variety of simulations based on real data.},
- author = {Lund, Steven P and Nettleton, Dan and McCarthy, Davis J and Smyth, Gordon K},
- doi = {10.1515/1544-6115.1826},
- file = {:Users/ryan/Documents/Mendeley Desktop/Lund et al. - 2012 - Detecting differential expression in RNA-sequence data using quasi-likelihood with shrunken dispersion estimates.pdf:pdf},
- issn = {1544-6115},
- journal = {Statistical applications in genetics and molecular biology},
- keywords = {Base Sequence,Databases,Gene Expression Profiling,Gene Expression Profiling: methods,Gene Expression Profiling: statistics {\&} numerical,Genetic,Likelihood Functions,Messenger,Messenger: metabolism,RNA,RNA: methods,Sequence Analysis},
- month = {jan},
- number = {5},
- pmid = {23104842},
- title = {{Detecting differential expression in RNA-sequence data using quasi-likelihood with shrunken dispersion estimates.}},
- url = {http://www.degruyter.com/dg/viewarticle.fullcontentlink:pdfeventlink/{\$}002fj{\$}002fsagmb.2012.11.issue-5{\$}002f1544-6115.1826{\$}002f1544-6115.1826.xml?format=INT{\&}t:ac=j{\$}002fsagmb.2012.11.issue-5{\$}002f1544-6115.1826{\$}002f1544-6115.1826.xml http://www.ncbi.nlm.nih.gov/pubmed/23104842},
- volume = {11},
- year = {2012}
- }
- @article{Popendorf2010,
- abstract = {With the number of available genome sequences increasing rapidly, the magnitude of sequence data required for multiple-genome analyses is a challenging problem. When large-scale rearrangements break the collinearity of gene orders among genomes, genome comparison algorithms must first identify sets of short well-conserved sequences present in each genome, termed anchors. Previously, anchor identification among multiple genomes has been achieved using pairwise alignment tools like BLASTZ through progressive alignment tools like TBA, but the computational requirements for sequence comparisons of multiple genomes quickly becomes a limiting factor as the number and scale of genomes grows.},
- author = {Popendorf, Kris and Tsuyoshi, Hachiya and Osana, Yasunori and Sakakibara, Yasubumi},
- doi = {10.1371/journal.pone.0012651},
- file = {:Users/ryan/Documents/Mendeley Desktop/Popendorf et al. - 2010 - Murasaki a fast, parallelizable algorithm to find anchors from multiple genomes.pdf:pdf},
- issn = {1932-6203},
- journal = {PloS one},
- keywords = {Algorithms,Animals,Bacteria,Bacteria: chemistry,Bacteria: genetics,Cattle,Conserved Sequence,Dogs,Genome,Humans,Mammals,Mammals: genetics,Mice,Rats,Sequence Alignment,Sequence Alignment: methods},
- month = {jan},
- number = {9},
- pages = {e12651},
- pmid = {20885980},
- title = {{Murasaki: a fast, parallelizable algorithm to find anchors from multiple genomes.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2945767{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {5},
- year = {2010}
- }
- @article{Ramachandran2013,
- author = {Ramachandran, Parameswaran and Perkins, Theodore J},
- doi = {10.1186/1753-6561-7-S7-S7},
- file = {:Users/ryan/Documents/Mendeley Desktop/Ramachandran, Perkins - 2013 - Adaptive bandwidth kernel density estimation for next-generation sequencing data.pdf:pdf},
- issn = {1753-6561},
- journal = {BMC Proceedings},
- number = {Suppl 7},
- pages = {S7},
- publisher = {BioMed Central Ltd},
- title = {{Adaptive bandwidth kernel density estimation for next-generation sequencing data}},
- url = {http://www.biomedcentral.com/1753-6561/7/S7/S7},
- volume = {7},
- year = {2013}
- }
- @article{Li2012,
- abstract = {We discuss the identification of genes that are associated with an outcome in RNA sequencing and other sequence-based comparative genomic experiments. RNA-sequencing data take the form of counts, so models based on the Gaussian distribution are unsuitable. Moreover, normalization is challenging because different sequencing experiments may generate quite different total numbers of reads. To overcome these difficulties, we use a log-linear model with a new approach to normalization. We derive a novel procedure to estimate the false discovery rate (FDR). Our method can be applied to data with quantitative, two-class, or multiple-class outcomes, and the computation is fast even for large data sets. We study the accuracy of our approaches for significance calculation and FDR estimation, and we demonstrate that our method has potential advantages over existing methods that are based on a Poisson or negative binomial model. In summary, this work provides a pipeline for the significance analysis of sequencing data.},
- author = {Li, Jun and Witten, Daniela M and Johnstone, Iain M and Tibshirani, Robert},
- doi = {10.1093/biostatistics/kxr031},
- file = {:Users/ryan/Documents/Mendeley Desktop/Li et al. - 2012 - Normalization, testing, and false discovery rate estimation for RNA-sequencing data.pdf:pdf},
- issn = {1468-4357},
- journal = {Biostatistics (Oxford, England)},
- keywords = {Data Interpretation, Statistical,Humans,Models, Statistical,RNA, Messenger,RNA, Messenger: chemistry,RNA, Messenger: genetics,Reverse Transcriptase Polymerase Chain Reaction,Sequence Analysis, DNA,Sequence Analysis, DNA: methods},
- month = {jul},
- number = {3},
- pages = {523--38},
- pmid = {22003245},
- title = {{Normalization, testing, and false discovery rate estimation for RNA-sequencing data.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/22003245},
- volume = {13},
- year = {2012}
- }
- @article{Hardcastle2010,
- abstract = {High throughput sequencing has become an important technology for studying expression levels in many types of genomic, and particularly transcriptomic, data. One key way of analysing such data is to look for elements of the data which display particular patterns of differential expression in order to take these forward for further analysis and validation.},
- author = {Hardcastle, Thomas J and Kelly, Krystyna a},
- doi = {10.1186/1471-2105-11-422},
- file = {:Users/ryan/Documents/Mendeley Desktop/Hardcastle, Kelly - 2010 - baySeq empirical Bayesian methods for identifying differential expression in sequence count data.pdf:pdf},
- issn = {1471-2105},
- journal = {BMC bioinformatics},
- keywords = {Algorithms,Arabidopsis,Arabidopsis: genetics,Base Sequence,Bayes Theorem,Gene Expression Profiling,Gene Expression Profiling: methods,RNA, Plant,RNA, Plant: genetics,Research Design},
- month = {jan},
- pages = {422},
- pmid = {20698981},
- title = {{baySeq: empirical Bayesian methods for identifying differential expression in sequence count data.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2928208{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {11},
- year = {2010}
- }
- @article{Russo2014,
- abstract = {UNLABELLED: We present RNASeqGUI R package, a graphical user interface (GUI) for the identification of differentially expressed genes across multiple biological conditions. This R package includes some well-known RNA-Seq tools, available at www.bioconductor.org. RNASeqGUI package is not just a collection of some known methods and functions, but it is designed to guide the user during the entire analysis process. RNASeqGUI package is mainly addressed to those users who have little experience with command-line software. Therefore, thanks to RNASeqGUI, they can conduct analogous analyses using this simple graphical interface. Moreover, RNASeqGUI is also helpful for those who are expert R-users because it speeds up the usage of the included RNASeq methods drastically.$\backslash$n$\backslash$nAVAILABILITY AND IMPLEMENTATION: RNASeqGUI package needs the RGTK2 graphical library to run. This package is open source and is freely available under General Public License at http://bioinfo.na.iac.cnr.it/RNASeqGUI/Download.$\backslash$n$\backslash$nSUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.},
- author = {Russo, Francesco and Angelini, Claudia},
- doi = {10.1093/bioinformatics/btu308},
- file = {:Users/ryan/Documents/Mendeley Desktop/Russo, Angelini - 2014 - RNASeqGUI A GUI for analysing RNA-Seq data.pdf:pdf},
- isbn = {1367-4811 (Electronic)$\backslash$r1367-4803 (Linking)},
- issn = {14602059},
- journal = {Bioinformatics},
- number = {17},
- pages = {2514--2516},
- pmid = {24812338},
- title = {{RNASeqGUI: A GUI for analysing RNA-Seq data}},
- url = {http://bioinformatics.oxfordjournals.org/content/30/17/2514.full.pdf},
- volume = {30},
- year = {2014}
- }
- @article{Argelaguet2018,
- abstract = {Multi-omics studies promise the improved characterization of biological processes across molecular layers. However, methods for the unsupervised integration of the resulting heterogeneous data sets are lacking. We present Multi-Omics Factor Analysis (MOFA), a computational method for discovering the principal sources of variation in multi-omics data sets. MOFA infers a set of (hidden) factors that capture biological and technical sources of variability. It disentangles axes of heterogeneity that are shared across multiple modalities and those specific to individual data modalities. The learnt factors enable a variety of downstream analyses, including identification of sample subgroups, data imputation and the detection of outlier samples. We applied MOFA to a cohort of 200 patient samples of chronic lymphocytic leukaemia, profiled for somatic mutations, RNA expression, DNA methylation and ex vivo drug responses. MOFA identified major dimensions of disease heterogeneity, including immunoglobulin heavy-chain variable region status, trisomy of chromosome 12 and previously underappreciated drivers, such as response to oxidative stress. In a second application, we used MOFA to analyse single-cell multi-omics data, identifying coordinated transcriptional and epigenetic changes along cell differentiation.},
- author = {Argelaguet, Ricard and Velten, Britta and Arnol, Damien and Dietrich, Sascha and Zenz, Thorsten and Marioni, John C and Buettner, Florian and Huber, Wolfgang and Stegle, Oliver},
- doi = {10.15252/msb.20178124},
- file = {:Users/ryan/Documents/Mendeley Desktop/Argelaguet et al. - 2018 - Multi‐Omics Factor Analysis—a framework for unsupervised integration of multi‐omics data sets(2).pdf:pdf},
- issn = {1744-4292},
- journal = {Molecular Systems Biology},
- keywords = {biology,data integration,dimensionality reduction,genome-scale,integrative,methods,multi-omics,personalized medicine,resources,single-cell omics,subject categories computational biology},
- month = {jun},
- number = {6},
- pages = {1--13},
- title = {{Multi‐Omics Factor Analysis—a framework for unsupervised integration of multi‐omics data sets}},
- url = {https://onlinelibrary.wiley.com/doi/abs/10.15252/msb.20178124},
- volume = {14},
- year = {2018}
- }
- @article{Reyes2017,
- abstract = {@rtraborn Important new preprint from @wolfgangkhuber: alternate TSSs and TTSs underpin most transcript isoform differences in human tissues.$\backslash$r$\backslash$n$\backslash$r$\backslash$nMost human genes have multiple transcription start and polyadenylation sites, as well as alternatively spliced exons. While transcript isoform diversity contributes to shape cellular specificity, it is currently unclear what is the balance of contributions from alternative splicing compared to alternative start and termination sites of transcription. Here, we address this question by analyzing data from the Genotype-Tissue Expression Project. We found tissue-dependent usage of exons for around one-half of expressed genes. Although tissue-dependent splicing was frequent among untranslated exons, it explained less than half of the differences in exon usage across tissues, suggesting that most of these differences were driven by alternative transcription start and termination sites. Analysis of the FANTOM Project data confirmed widespread tissue-dependent usage of alternative transcriptional start sites. Our analysis highlights alternative initiation and termination sites of transcription as the main drivers of isoform diversity across tissues. We also show that most tissue-dependent splicing is unlikely to have consequences at the proteome level.$\backslash$r$\backslash$n$\backslash$r$\backslash$n},
- author = {Reyes, Alejandro and Huber, Wolfgang},
- doi = {10.1101/127894},
- file = {:Users/ryan/Documents/Mendeley Desktop/Reyes, Huber - 2017 - Transcript Isoform Differences Across Human Tissues Are Predominantly Driven By Alternative Start And Termination.pdf:pdf},
- journal = {bioRxiv},
- pages = {1--23},
- title = {{Transcript Isoform Differences Across Human Tissues Are Predominantly Driven By Alternative Start And Termination Sites Of Transcription}},
- url = {http://biorxiv.org/content/early/2017/04/17/127894},
- year = {2017}
- }
- @article{Alexeyenko2012a,
- abstract = {BACKGROUND: Gene-set enrichment analyses (GEA or GSEA) are commonly used for biological characterization of an experimental gene-set. This is done by finding known functional categories, such as pathways or Gene Ontology terms, that are over-represented in the experimental set; the assessment is based on an overlap statistic. Rich biological information in terms of gene interaction network is now widely available, but this topological information is not used by GEA, so there is a need for methods that exploit this type of information in high-throughput data analysis.
- RESULTS: We developed a method of network enrichment analysis (NEA) that extends the overlap statistic in GEA to network links between genes in the experimental set and those in the functional categories. For the crucial step in statistical inference, we developed a fast network randomization algorithm in order to obtain the distribution of any network statistic under the null hypothesis of no association between an experimental gene-set and a functional category. We illustrate the NEA method using gene and protein expression data from a lung cancer study.
- CONCLUSIONS: The results indicate that the NEA method is more powerful than the traditional GEA, primarily because the relationships between gene sets were more strongly captured by network connectivity rather than by simple overlaps.},
- author = {Alexeyenko, Andrey and Lee, Woojoo and Pernemalm, Maria and Guegan, Justin and Dessen, Philippe and Lazar, Vladimir and Lehti{\"{o}}, Janne and Pawitan, Yudi},
- doi = {10.1186/1471-2105-13-226},
- file = {:Users/ryan/Documents/Mendeley Desktop/Alexeyenko et al. - 2012 - Network enrichment analysis extension of gene-set enrichment analysis to gene networks.pdf:pdf},
- issn = {1471-2105},
- journal = {BMC bioinformatics},
- keywords = {Algorithms,Gene Expression,Gene Expression Profiling,Gene Expression Profiling: methods,Gene Regulatory Networks,Humans,Lung Neoplasms,Lung Neoplasms: genetics,Lung Neoplasms: metabolism,Protein Biosynthesis,Proteomics,Proteomics: methods},
- month = {jan},
- pages = {226},
- pmid = {22966941},
- title = {{Network enrichment analysis: extension of gene-set enrichment analysis to gene networks.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3505158{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {13},
- year = {2012}
- }
- @book{Murphy2012,
- address = {New York, NY},
- author = {Murphy, Kenneth},
- edition = {8th},
- isbn = {978-0815342434},
- publisher = {Garland Science},
- title = {{Janeway's Immunobiology}},
- year = {2012}
- }
- @article{Zou2014,
- author = {Zou, James and Lippert, Christoph and Heckerman, David and Aryee, Martin and Listgarten, Jennifer},
- doi = {10.1038/nmeth.2815},
- file = {:Users/ryan/Documents/Mendeley Desktop/Zou et al. - 2014 - Epigenome-wide association studies without the need for cell-type composition.pdf:pdf},
- issn = {1548-7091},
- journal = {Nature Methods},
- number = {3},
- pages = {309--311},
- title = {{Epigenome-wide association studies without the need for cell-type composition}},
- url = {http://www.nature.com/doifinder/10.1038/nmeth.2815},
- volume = {11},
- year = {2014}
- }
- @article{Wang2011a,
- author = {Wang, Kevin C and Yang, Yul W and Liu, Bo and Sanyal, Amartya and Corces-Zimmerman, Ryan and Chen, Yong and Lajoie, Bryan R and Protacio, Angeline and Flynn, Ryan A and Gupta, Rajnish A and Wysocka, Joanna and Lei, Ming and Dekker, Job and Helms, Jill A and Chang, Howard Y},
- doi = {10.1038/nature09819},
- file = {:Users/ryan/Documents/Mendeley Desktop/Wang et al. - 2011 - A long noncoding RNA maintains active chromatin to coordinate homeotic gene expression.pdf:pdf},
- issn = {0028-0836},
- journal = {Nature},
- month = {apr},
- number = {7341},
- pages = {120--124},
- title = {{A long noncoding RNA maintains active chromatin to coordinate homeotic gene expression}},
- url = {http://www.nature.com/doifinder/10.1038/nature09819},
- volume = {472},
- year = {2011}
- }
- @article{Wilson2018,
- abstract = {Analysis of `big data' frequently involves statistical comparison of millions of competing hypotheses to discover hidden processes underlying observed patterns of data, for example in the search for genetic determinants of disease in genome-wide association studies (GWAS). Controlling the family-wise error rate (FWER) is considered the strongest protection against false positives, but makes it difficult to reach the multiple testing-corrected significance threshold. Here I introduce the harmonic mean p-value (HMP) which controls the FWER while greatly improving statistical power by combining dependent tests using generalized central limit theorem. I show that the HMP easily combines information to detect statistically significant signals among groups of individually nonsignificant hypotheses in examples of a human GWAS for neuroticism and a joint human-pathogen GWAS for hepatitis C viral load. The HMP simultaneously tests all combinations of hypotheses, allowing the smallest groups of hypotheses that retain significance to be sought. The power of the HMP to detect significant hypothesis groups is greater than the power of the Benjamini-Hochberg procedure to detect significant hypotheses, even though the latter only controls the weaker false discovery rate (FDR). The HMP has broad implications for the analysis of large datasets because it enhances the potential for scientific discovery.},
- author = {Wilson, Daniel John},
- doi = {10.1101/171751},
- file = {:Users/ryan/Documents/Mendeley Desktop/Wilson - 2018 - The harmonic mean p-value for combining dependent tests.pdf:pdf},
- journal = {bioRxiv},
- pages = {171751},
- title = {{The harmonic mean p-value for combining dependent tests}},
- url = {https://www.biorxiv.org/content/early/2018/02/07/171751},
- year = {2018}
- }
- @article{Dey2018,
- abstract = {Estimation of correlation matrices and correlations among variables is a ubiquitous problem in statistics. In many cases -- especially when the number of observations is small relative to the number of variables -- some kind of shrinkage or regularization is necessary to improve estimation accuracy. Here, we propose an Empirical Bayes shrinkage approach, CorShrink, which adaptively learns how much to shrink correlations by combining information across all pairs of variables. One key feature of CorShrink, which distinguishes it from most existing methods, is its flexibility in dealing with missing data. Indeed, CorShrink explicitly accounts for varying amounts of missingness among pairs of variables. Numerical studies suggest CorShrink is competitive with other popular correlation shrinkage methods, even when there is no missing data. We illustrate CorShrink on gene expression data from GTEx project, which suffers from extensive missing observations, and where existing methods struggle. We also illustrate its flexibility by applying it to estimate cosine similarities between word vectors from word2vec models, thereby generating more accurate word similarity rankings.},
- author = {Dey, kushal K and Stephens, Matthew},
- doi = {10.1101/368316},
- file = {:Users/ryan/Documents/Mendeley Desktop/dey, Stephens - 2018 - CorShrink Empirical Bayes shrinkage estimation of correlations, with applications.pdf:pdf},
- journal = {bioRxiv},
- keywords = {correlation shrinkage,empirical bayes,genomics,missing data},
- pages = {368316},
- title = {{CorShrink : Empirical Bayes shrinkage estimation of correlations, with applications}},
- url = {https://www.biorxiv.org/content/early/2018/07/13/368316},
- year = {2018}
- }
- @article{Anders2010,
- abstract = {High-throughput sequencing assays such as RNA-Seq, ChIP-Seq or barcode counting provide quantitative readouts in the form of count data. To infer differential signal in such data correctly and with good statistical power, estimation of data variability throughout the dynamic range and a suitable error model are required. We propose a method based on the negative binomial distribution, with variance and mean linked by local regression and present an implementation, DESeq, as an R/Bioconductor package.},
- author = {Anders, Simon and Huber, Wolfgang},
- doi = {10.1186/gb-2010-11-10-r106},
- file = {:Users/ryan/Documents/Mendeley Desktop//Anders, Huber - 2010 - Differential expression analysis for sequence count data.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Anders, Huber - 2010 - Differential expression analysis for sequence count data(4).pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Anders, Huber - 2010 - Differential expression analysis for sequence count data.pdf:pdf},
- issn = {1465-6914},
- journal = {Genome biology},
- keywords = {Animals,Binomial Distribution,Chromatin Immunoprecipitation,Chromatin Immunoprecipitation: methods,Computational Biology,Computational Biology: methods,Drosophila,Drosophila: genetics,Gene Expression Profiling,Gene Expression Profiling: methods,Genetic,High-Throughput Nucleotide Sequencing,High-Throughput Nucleotide Sequencing: methods,Linear Models,Models,RNA,RNA: methods,Saccharomyces cerevisiae,Saccharomyces cerevisiae: genetics,Sequence Analysis,Stem Cells,Tissue Culture Techniques},
- month = {jan},
- number = {10},
- pages = {R106},
- pmid = {20979621},
- publisher = {BioMed Central Ltd},
- title = {{Differential expression analysis for sequence count data.}},
- url = {http://genomebiology.biomedcentral.com/articles/10.1186/gb-2010-11-10-r106},
- volume = {11},
- year = {2010}
- }
- @article{Ankrum2014,
- abstract = {The diverse immunomodulatory properties of mesenchymal stem/stromal cells (MSCs) may be exploited for treatment of a multitude of inflammatory conditions. MSCs have long been reported to be hypoimmunogenic or 'immune privileged'; this property is thought to enable MSC transplantation across major histocompatibility barriers and the creation of off-the-shelf therapies consisting of MSCs grown in culture. However, recent studies describing generation of antibodies against and immune rejection of allogeneic donor MSCs suggest that MSCs may not actually be immune privileged. Nevertheless, whether rejection of donor MSCs influences the efficacy of allogeneic MSC therapies is not known, and no definitive clinical advantage of autologous MSCs over allogeneic MSCs has been demonstrated to date. Although MSCs may exert therapeutic function through a brief 'hit and run' mechanism, protecting MSCs from immune detection and prolonging their persistence in vivo may improve clinical outcomes and prevent patient sensitization toward donor antigens. {\textcopyright} 2014 Nature America, Inc.},
- author = {Ankrum, James A. and Ong, Joon Faii and Karp, Jeffrey M.},
- doi = {10.1038/nbt.2816},
- file = {:Users/ryan/Documents/Mendeley Desktop/Ankrum, Ong, Karp - 2014 - Mesenchymal stem cells Immune evasive, not immune privileged.pdf:pdf},
- issn = {15461696},
- journal = {Nature Biotechnology},
- number = {3},
- pages = {252--260},
- title = {{Mesenchymal stem cells: Immune evasive, not immune privileged}},
- volume = {32},
- year = {2014}
- }
- @article{Zhang2008,
- abstract = {We present Model-based Analysis of ChIP-Seq data, MACS, which analyzes data generated by short read sequencers such as Solexa's Genome Analyzer. MACS empirically models the shift size of ChIP-Seq tags, and uses it to improve the spatial resolution of predicted binding sites. MACS also uses a dynamic Poisson distribution to effectively capture local biases in the genome, allowing for more robust predictions. MACS compares favorably to existing ChIP-Seq peak-finding algorithms, and is freely available.},
- author = {Zhang, Yong and Liu, Tao and Meyer, Clifford a and Eeckhoute, J{\'{e}}r{\^{o}}me and Johnson, David S and Bernstein, Bradley E and Nussbaum, Chad and Myers, Richard M and Brown, Myles and Li, Wei and Liu, X Shirley},
- doi = {10.1186/gb-2008-9-9-r137},
- file = {:Users/ryan/Documents/Mendeley Desktop/Zhang et al. - 2008 - Model-based analysis of ChIP-Seq (MACS).pdf:pdf},
- issn = {1465-6906},
- journal = {Genome Biology},
- keywords = {Algorithms,Cell Line,Chromatin Immunoprecipitation,Chromatin Immunoprecipitation: methods,Genetic,Hepatocyte Nuclear Factor 3-alpha,Hepatocyte Nuclear Factor 3-alpha: analysis,Hepatocyte Nuclear Factor 3-alpha: genetics,Humans,Models,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Tumor},
- month = {jan},
- number = {9},
- pages = {R137},
- pmid = {18798982},
- title = {{Model-based Analysis of ChIP-Seq (MACS)}},
- url = {http://genomebiology.biomedcentral.com/articles/10.1186/gb-2008-9-9-r137},
- volume = {9},
- year = {2008}
- }
- @article{Holik2016,
- abstract = {Carefully designed control experiments provide a gold standard for benchmarking different genomics research tools. A shortcoming of many gene expression control studies is that replication involves profiling the same reference RNA sample multiple times. This leads to low, pure technical noise that is atypical of regular studies. To achieve a more realistic noise structure, we generated a RNA-sequencing mixture experiment using two cell lines of the same cancer type. Variability was added by extracting RNA from independent cell cultures and degrading particular samples. The systematic gene expression changes induced by this design allowed benchmarking of different library preparation kits (standard poly-A versus total RNA with Ribozero depletion) and analysis pipelines. Data generated using the total RNA kit had more signal for introns and various RNA classes (ncRNA, snRNA, snoRNA) and less variability after degradation. For differential expression analysis, voom with quality weights marginally outperformed other popular methods, while for differential splicing, DEXSeq was simultaneously the most sensitive and the most inconsistent method. For sample deconvolution analysis, DeMix outperformed IsoPure convincingly. Our RNA-sequencing data set provides a valuable resource for benchmarking different protocols and data pre-processing workflows. The extra noise mimics routine lab experiments more closely, ensuring any conclusions are widely applicable.},
- archivePrefix = {arXiv},
- arxivId = {1611.06654},
- author = {Holik, Aliaksei Z. and Law, Charity W. and Liu, Ruijie and Wang, Zeya and Wang, Wenyi and Ahn, Jaeil and Smyth, Gordon K. and Ritchie, Matthew E. and Asselin-Labat, Marie Liesse and Smyth, Gordon K. and Ritchie, Matthew E.},
- doi = {10.1093/nar/gkw1063},
- eprint = {1611.06654},
- file = {:Users/ryan/Documents/Mendeley Desktop/Holik et al. - 2016 - RNA-seq mixology designing realistic control experiments to compare protocols and analysis methods.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Holik et al. - 2016 - RNA-seq mixology designing realistic control experiments to compare protocols and analysis methods.pdf:pdf},
- isbn = {2076792171},
- issn = {13624962},
- journal = {Nucleic Acids Research},
- number = {5},
- pages = {1--36},
- pmid = {27899618},
- title = {{RNA-seq mixology : designing realistic control experiments to compare protocols and analysis methods}},
- url = {https://oup.silverchair-cdn.com/oup/backfile/Content{\_}public/Journal/nar/45/5/10.1093{\_}nar{\_}gkw1063/3/gkw1063.pdf?Expires=1501899261{\&}Signature=EQwkN5ZwzXvF-YJfvolbr2XTeOaSnv3o4sozi6wYLZhkfwZc5Ld0VvS9mruhNWyNSOvSy9fsScmIl7f5TQBUQLSzNuKXmkUT5lrSr3Q-tPNwo8-32P6},
- volume = {45},
- year = {2016}
- }
- @article{Hicks2016,
- author = {Hicks, Stephanie C and Okrah, Kwame and Paulson, Joseph N and Quackenbush, John and Irizarry, Rafael A and {Corrada Bravo}, Hector},
- doi = {10.1101/085175},
- file = {:Users/ryan/Documents/Mendeley Desktop/Hicks et al. - 2016 - Smooth Quantile Normalization.pdf:pdf},
- journal = {bioRxiv},
- title = {{Smooth Quantile Normalization}},
- year = {2016}
- }
- @article{Siepel2007,
- abstract = {A complete and accurate set of human protein-coding gene annotations is perhaps the single most important resource for genomic research after the human-genome sequence itself, yet the major gene catalogs remain incomplete and imperfect. Here we describe a genome-wide effort, carried out as part of the Mammalian Gene Collection (MGC) project, to identify human genes not yet in the gene catalogs. Our approach was to produce gene predictions by algorithms that rely on comparative sequence data but do not require direct cDNA evidence, then to test predicted novel genes by RT-PCR. We have identified 734 novel gene fragments (NGFs) containing 2188 exons with, at most, weak prior cDNA support. These NGFs correspond to an estimated 563 distinct genes, of which {\textgreater}160 are completely absent from the major gene catalogs, while hundreds of others represent significant extensions of known genes. The NGFs appear to be predominantly protein-coding genes rather than noncoding RNAs, unlike novel transcribed sequences identified by technologies such as tiling arrays and CAGE. They tend to be expressed at low levels and in a tissue-specific manner, and they are enriched for roles in motor activity, cell adhesion, connective tissue, and central nervous system development. Our results demonstrate that many important genes and gene fragments have been missed by traditional approaches to gene discovery but can be identified by their evolutionary signatures using comparative sequence data. However, they suggest that hundreds-not thousands-of protein-coding genes are completely missing from the current gene catalogs.},
- author = {Siepel, Adam and Diekhans, Mark and Brejov{\'{a}}, Brona and Langton, Laura and Stevens, Michael and Comstock, Charles L G and Davis, Colleen and Ewing, Brent and Oommen, Shelly and Lau, Christopher and Yu, Hung-Chun and Li, Jianfeng and Roe, Bruce a and Green, Phil and Gerhard, Daniela S and Temple, Gary and Haussler, David and Brent, Michael R},
- doi = {10.1101/gr.7128207},
- file = {:Users/ryan/Documents/Mendeley Desktop/Siepel et al. - 2007 - Targeted discovery of novel human exons by comparative genomics.pdf:pdf},
- issn = {1088-9051},
- journal = {Genome research},
- keywords = {Animals,Base Sequence,Chickens,Chickens: genetics,Computational Biology,Exons,Exons: genetics,Expressed Sequence Tags,Genome, Human,Genomics,Humans,Mice,Predictive Value of Tests,Rats,Reverse Transcriptase Polymerase Chain Reaction,Zebrafish,Zebrafish: embryology,Zebrafish: genetics},
- month = {dec},
- number = {12},
- pages = {1763--73},
- pmid = {17989246},
- title = {{Targeted discovery of novel human exons by comparative genomics.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2099585{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {17},
- year = {2007}
- }
- @article{Boyle2008,
- abstract = {Mapping DNase I hypersensitive (HS) sites is an accurate method of identifying the location of genetic regulatory elements, including promoters, enhancers, silencers, insulators, and locus control regions. We employed high-throughput sequencing and whole-genome tiled array strategies to identify DNase I HS sites within human primary CD4+ T cells. Combining these two technologies, we have created a comprehensive and accurate genome-wide open chromatin map. Surprisingly, only 16{\%}-21{\%} of the identified 94,925 DNase I HS sites are found in promoters or first exons of known genes, but nearly half of the most open sites are in these regions. In conjunction with expression, motif, and chromatin immunoprecipitation data, we find evidence of cell-type-specific characteristics, including the ability to identify transcription start sites and locations of different chromatin marks utilized in these cells. In addition, and unexpectedly, our analyses have uncovered detailed features of nucleosome structure.},
- author = {Boyle, Alan P and Davis, Sean and Shulha, Hennady P and Meltzer, Paul and Margulies, Elliott H and Weng, Zhiping and Furey, Terrence S and Crawford, Gregory E},
- doi = {10.1016/j.cell.2007.12.014},
- file = {:Users/ryan/Documents/Mendeley Desktop/Boyle et al. - 2008 - High-resolution mapping and characterization of open chromatin across the genome.pdf:pdf},
- issn = {1097-4172},
- journal = {Cell},
- keywords = {Algorithms,Area Under Curve,Binding Sites,CD4-Positive T-Lymphocytes,CD4-Positive T-Lymphocytes: cytology,Cell Nucleus,Cell Nucleus: metabolism,Chromatin,Chromatin Immunoprecipitation,Chromatin: genetics,Chromosome Mapping,Chromosome Mapping: methods,Chromosomes, Human,Deoxyribonuclease I,Deoxyribonuclease I: chemistry,Deoxyribonuclease I: pharmacology,Genome, Human,Genome, Human: genetics,Genome, Human: immunology,Histones,Histones: chemistry,Humans,Nucleosomes,Nucleosomes: chemistry,Oligonucleotide Array Sequence Analysis,Promoter Regions, Genetic,ROC Curve,Sensitivity and Specificity,Sequence Analysis, DNA,Transcription Factors,Transcription Factors: metabolism},
- month = {jan},
- number = {2},
- pages = {311--22},
- pmid = {18243105},
- title = {{High-resolution mapping and characterization of open chromatin across the genome.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2669738{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {132},
- year = {2008}
- }
- @article{Lahens2014,
- abstract = {BACKGROUND: RNA-seq is a powerful technique for identifying and quantifying transcription and splicing events, both known and novel. However, given its recent development and the proliferation of library construction methods, understanding the bias it introduces is incomplete but critical to realizing its value.$\backslash$n$\backslash$nRESULTS: We present a method, in vitro transcription sequencing (IVT-seq), for identifying and assessing the technical biases in RNA-seq library generation and sequencing at scale. We created a pool of over 1,000 in vitro transcribed RNAs from a full-length human cDNA library and sequenced them with polyA and total RNA-seq, the most common protocols. Because each cDNA is full length, and we show in vitro transcription is incredibly processive, each base in each transcript should be equivalently represented. However, with common RNA-seq applications and platforms, we find 50{\%} of transcripts have more than two-fold and 10{\%} have more than 10-fold differences in within-transcript sequence coverage. We also find greater than 6{\%} of transcripts have regions of dramatically unpredictable sequencing coverage between samples, confounding accurate determination of their expression. We use a combination of experimental and computational approaches to show rRNA depletion is responsible for the most significant variability in coverage, and several sequence determinants also strongly influence representation.$\backslash$n$\backslash$nCONCLUSIONS: These results show the utility of IVT-seq for promoting better understanding of bias introduced by RNA-seq. We find rRNA depletion is responsible for substantial, unappreciated biases in coverage introduced during library preparation. These biases suggest exon-level expression analysis may be inadvisable, and we recommend caution when interpreting RNA-seq results.},
- author = {Lahens, Nicholas F and Kavakli, Ibrahim Halil and Zhang, Ray and Hayer, Katharina and Black, Michael B and Dueck, Hannah and Pizarro, Angel and Kim, Junhyong and Irizarry, Rafael and Thomas, Russell S and Grant, Gregory R and Hogenesch, John B},
- doi = {10.1186/gb-2014-15-6-r86},
- file = {:Users/ryan/Documents/Mendeley Desktop/Lahens et al. - 2014 - IVT-seq reveals extreme bias in RNA sequencing.pdf:pdf},
- isbn = {1465-6906},
- issn = {1474-760X},
- journal = {Genome biology},
- keywords = {Animals,Artifacts,Base Composition,Base Sequence,Gene Library,Genetic,Humans,In Vitro Techniques,Inbred C57BL,Male,Mice,Nucleic Acid,RNA,Ribosomal,Ribosomal: genetics,Sequence Analysis,Sequence Homology,Transcription},
- number = {6},
- pages = {R86},
- pmid = {24981968},
- title = {{IVT-seq reveals extreme bias in RNA sequencing.}},
- url = {http://www.biomedcentral.com/1471-2164/15/1073{\%}5Cnhttp://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=4197826{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {15},
- year = {2014}
- }
- @article{Nygaard2015,
- abstract = {Removal of, or adjustment for, batch effects or center differences is generally required when such effects are present in data. In particular, when preparing microarray gene expression data from multiple cohorts, array platforms, or batches for later analyses, batch effects can have confounding effects, inducing spurious differences between study groups. Many methods and tools exist for removing batch effects from data. However, when study groups are not evenly distributed across batches, actual group differences may induce apparent batch differences, in which case batch adjustments may bias, usually deflate, group differences. Some tools therefore have the option of preserving the difference between study groups, e.g. using a two-way ANOVA model to simultaneously estimate both group and batch effects. Unfortunately, this approach may systematically induce incorrect group differences in downstream analyses when groups are distributed between the batches in an unbalanced manner. The scientific community seems to be largely unaware of how this approach may lead to false discoveries.},
- author = {Nygaard, Vegard and R{\o}dland, Einar Andreas and Hovig, Eivind},
- doi = {10.1093/biostatistics/kxv027},
- file = {:Users/ryan/Documents/Mendeley Desktop/Nygaard, R{\o}dland, Hovig - 2015 - Methods that remove batch effects while retaining group differences may lead to exaggerated confide(2).pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Nygaard, R{\o}dland, Hovig - 2015 - Methods that remove batch effects while retaining group differences may lead to exaggerated confidence.pdf:pdf},
- issn = {1468-4357},
- journal = {Biostatistics (Oxford, England)},
- keywords = {Batch effects,Data normalization,Microarrays,Reproducible research.,batch effects,data normalization,microarrays,reproducible research},
- number = {1},
- pages = {kxv027},
- pmid = {26272994},
- title = {{Methods that remove batch effects while retaining group differences may lead to exaggerated confidence in downstream analyses.}},
- url = {http://biostatistics.oxfordjournals.org/lookup/doi/10.1093/biostatistics/kxv027{\%}5Cnhttp://www.ncbi.nlm.nih.gov/pubmed/26272994 http://biostatistics.oxfordjournals.org/content/17/1/29.full.pdf},
- volume = {17},
- year = {2015}
- }
- @article{Nueda2007,
- abstract = {Designed microarray experiments are used to investigate the effects that controlled experimental factors have on gene expression and learn about the transcriptional responses associated with external variables. In these datasets, signals of interest coexist with varying sources of unwanted noise in a framework of (co)relation among the measured variables and with the different levels of the studied factors. Discovering experimentally relevant transcriptional changes require methodologies that take all these elements into account.},
- author = {Nueda, Mar{\'{i}}a Jos{\'{e}} and Conesa, Ana and Westerhuis, Johan a and Hoefsloot, Huub C J and Smilde, Age K and Tal{\'{o}}n, Manuel and Ferrer, Alberto},
- doi = {10.1093/bioinformatics/btm251},
- file = {:Users/ryan/Documents/Mendeley Desktop/Nueda et al. - 2007 - Discovering gene expression patterns in time course microarray experiments by ANOVA-SCA.pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {Algorithms,Analysis of Variance,Computational Biology,Computational Biology: methods,Computer Simulation,Data Interpretation, Statistical,Gene Expression Profiling,Gene Expression Profiling: methods,Models, Genetic,Models, Statistical,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Principal Component Analysis,Time Factors,Transcription, Genetic},
- month = {jul},
- number = {14},
- pages = {1792--800},
- pmid = {17519250},
- title = {{Discovering gene expression patterns in time course microarray experiments by ANOVA-SCA.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/17519250},
- volume = {23},
- year = {2007}
- }
- @article{Reeb2013,
- abstract = {Validating statistical analysis methods for RNA sequencing (RNA-seq) experiments is a complex task. Researchers often find themselves having to decide between competing models or assessing the reliability of results obtained with a designated analysis program. Computer simulation has been the most frequently used procedure to verify the adequacy of a model. However, datasets generated by simulations depend on the parameterization and the assumptions of the selected model. Moreover, such datasets may constitute a partial representation of reality as the complexity or RNA-seq data is hard to mimic. We present the use of plasmode datasets to complement the evaluation of statistical models for RNA-seq data. A plasmode is a dataset obtained from experimental data but for which come truth is known. Using a set of simulated scenarios of technical and biological replicates, and public available datasets, we illustrate how to design algorithms to construct plasmodes under different experimental conditions. We contrast results from two types of methods for RNA-seq: (1) models based on negative binomial distribution (edgeR and DESeq), and (2) Gaussian models applied after transformation of data (MAANOVA). Results emphasize the fact that deciding what method to use may be experiment-specific due to the unknown distributions of expression levels. Plasmodes may contribute to choose which method to apply by using a similar pre-existing dataset. The promising results obtained from this approach, emphasize the need of promoting and improving systematic data sharing across the research community to facilitate plasmode building. Although we illustrate the use of plasmode for comparing differential expression analysis models, the flexibility of plasmode construction allows comparing upstream analysis, as normalization procedures or alignment pipelines, as well.},
- author = {Reeb, Pablo D. and Steibel, Juan P.},
- doi = {10.3389/fgene.2013.00178},
- file = {:Users/ryan/Documents/Mendeley Desktop/Reeb, Steibel - 2013 - Evaluating statistical analysis models for RNA sequencing experiments.pdf:pdf},
- issn = {1664-8021},
- journal = {Frontiers in Genetics},
- keywords = {RNA-seq,line,linear models,plasmodes,simulation,type I error},
- number = {September},
- pages = {1--9},
- title = {{Evaluating statistical analysis models for RNA sequencing experiments}},
- url = {http://www.frontiersin.org/Statistical{\_}Genetics{\_}and{\_}Methodology/10.3389/fgene.2013.00178/abstract},
- volume = {4},
- year = {2013}
- }
- @article{Wu2010a,
- abstract = {A gene set test is a differential expression analysis in which a P-value is assigned to a set of genes as a unit. Gene set tests are valuable for increasing statistical power, organizing and interpreting results and for relating expression patterns across different experiments. Existing methods are based on permutation. Methods that rely on permutation of probes unrealistically assume independence of genes, while those that rely on permutation of sample are suitable only for two-group comparisons with a good number of replicates in each group.},
- author = {Wu, Di and Lim, Elgene and Vaillant, Fran{\c{c}}ois and Asselin-Labat, Marie-Liesse and Visvader, Jane E and Smyth, Gordon K},
- doi = {10.1093/bioinformatics/btq401},
- file = {:Users/ryan/Documents/Mendeley Desktop/Wu et al. - 2010 - ROAST rotation gene set tests for complex microarray experiments.pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {Algorithms,Animals,Gene Expression Profiling,Gene Expression Profiling: methods,Humans,Linear Models,Mice,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods},
- month = {sep},
- number = {17},
- pages = {2176--82},
- pmid = {20610611},
- title = {{ROAST: rotation gene set tests for complex microarray experiments.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2922896{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {26},
- year = {2010}
- }
- @article{Ouyang2009,
- abstract = {Next-generation sequencing has greatly increased the scope and the resolution of transcriptional regulation study. RNA sequencing (RNA-Seq) and ChIP-Seq experiments are now generating comprehensive data on transcript abundance and on regulator-DNA interactions. We propose an approach for an integrated analysis of these data based on feature extraction of ChIP-Seq signals, principal component analysis, and regression-based component selection. Compared with traditional methods, our approach not only offers higher power in predicting gene expression from ChIP-Seq data but also provides a way to capture cooperation among regulators. In mouse embryonic stem cells (ESCs), we find that a remarkably high proportion of variation in gene expression (65{\%}) can be explained by the binding signals of 12 transcription factors (TFs). Two groups of TFs are identified. Whereas the first group (E2f1, Myc, Mycn, and Zfx) act as activators in general, the second group (Oct4, Nanog, Sox2, Smad1, Stat3, Tcfcp2l1, and Esrrb) may serve as either activator or repressor depending on the target. The two groups of TFs cooperate tightly to activate genes that are differentially up-regulated in ESCs. In the absence of binding by the first group, the binding of the second group is associated with genes that are repressed in ESCs and derepressed upon early differentiation.},
- author = {Ouyang, Zhengqing and Zhou, Qing and Wong, Wing Hung},
- doi = {10.1073/pnas.0904863106},
- file = {:Users/ryan/Documents/Mendeley Desktop/Ouyang, Zhou, Wong - 2009 - ChIP-Seq of transcription factors predicts absolute and differential gene expression in embryonic stem cells.pdf:pdf},
- isbn = {0027-8424},
- issn = {1091-6490},
- journal = {Proceedings of the National Academy of Sciences of the United States of America},
- keywords = {Animals,Chromatin Immunoprecipitation,Embryonic Stem Cells,Embryonic Stem Cells: metabolism,Gene Expression,Gene Regulatory Networks,Mice,Transcription Factors,Transcription Factors: chemistry,Transcription Factors: genetics,Transcription Factors: metabolism},
- number = {51},
- pages = {21521--6},
- pmid = {19995984},
- title = {{ChIP-Seq of transcription factors predicts absolute and differential gene expression in embryonic stem cells.}},
- url = {http://www.pnas.org/content/106/51/21521.abstract},
- volume = {106},
- year = {2009}
- }
- @article{Ritchie2006,
- abstract = {Background: Assessment of array quality is an essential step in the analysis of data from microarray experiments. Once detected, less reliable arrays are typically excluded or "filtered" from further analysis to avoid misleading results. Results: In this article, a graduated approach to array quality is considered based on empirical reproducibility of the gene expression measures from replicate arrays. Weights are assigned to each microarray by fitting a heteroscedastic linear model with shared array variance terms. A novel gene-by-gene update algorithm is used to efficiently estimate the array variances. The inverse variances are used as weights in the linear model analysis to identify differentially expressed genes. The method successfully assigns lower weights to less reproducible arrays from different experiments. Down-weighting the observations from suspect arrays increases the power to detect differential expression. In smaller experiments, this approach outperforms the usual method of filtering the data. The method is available in the limma software package which is implemented in the R software environment. Conclusion: This method complements existing normalisation and spot quality procedures, and allows poorer quality arrays, which would otherwise be discarded, to be included in an analysis. It is applicable to microarray data from experiments with some level of replication. {\textcopyright} 2006 Ritchie et al; licensee BioMed Central Ltd.},
- author = {Ritchie, Matthew E. and Diyagama, Dileepa and Neilson, Jody and van Laar, Ryan and Dobrovic, Alexander and Holloway, Andrew and Smyth, Gordon K.},
- doi = {10.1186/1471-2105-7-261},
- file = {:Users/ryan/Documents/Mendeley Desktop/Ritchie et al. - 2006 - Empirical array quality weights in the analysis of microarray data.pdf:pdf},
- issn = {14712105},
- journal = {BMC Bioinformatics},
- title = {{Empirical array quality weights in the analysis of microarray data}},
- url = {https://bmcbioinformatics.biomedcentral.com/track/pdf/10.1186/1471-2105-7-261},
- volume = {7},
- year = {2006}
- }
- @article{Crawford2006,
- author = {Crawford, Gregory E and Holt, Ingeborg E and Whittle, James and Webb, Bryn D and Tai, Denise and Davis, Sean and Margulies, Elliott H and Chen, Yidong and Bernat, John A and Ginsburg, David and Zhou, Daixing and Luo, Shujun and Vasicek, Thomas J and Daly, Mark J and Wolfsberg, Tyra G and Collins, Francis S},
- doi = {10.1101/gr.4074106.1},
- file = {:Users/ryan/Documents/Mendeley Desktop/Crawford et al. - 2006 - Genome-wide mapping of DNase hypersensitive sites using massively parallel signature sequencing ( MPSS ).pdf:pdf},
- journal = {Genome Research},
- pages = {123--131},
- title = {{Genome-wide mapping of DNase hypersensitive sites using massively parallel signature sequencing ( MPSS )}},
- year = {2006}
- }
- @article{Daily2011,
- abstract = {A central challenge of biology is to map and understand gene regulation on a genome-wide scale. For any given genome, only a small fraction of the regulatory elements embedded in the DNA sequence have been characterized, and there is great interest in developing computational methods to systematically map all these elements and understand their relationships. Such computational efforts, however, are significantly hindered by the overwhelming size of non-coding regions and the statistical variability and complex spatial organizations of regulatory elements and interactions. Genome-wide catalogs of regulatory elements for all model species simply do not yet exist. The MotifMap system uses databases of transcription factor binding motifs, refined genome alignments, and a comparative genomic statistical approach to provide comprehensive maps of candidate regulatory elements encoded in the genomes of model species. The system is used to derive new genome-wide maps for yeast, fly, worm, mouse, and human. The human map contains 519,108 sites for 570 matrices with a False Discovery Rate of 0.1 or less. The new maps are assessed in several ways, for instance using high-throughput experimental ChIP-seq data and AUC statistics, providing strong evidence for their accuracy and coverage. The maps can be usefully integrated with many other kinds of omic data and are available at $\backslash$n http://motifmap.igb.uci.edu/$\backslash$n $\backslash$n . MotifMap and its integration with other data provide a foundation for analyzing gene regulation on a genome-wide scale, and for automatically generating regulatory pathways and hypotheses. The power of this approach is demonstrated and discussed using the P53 apoptotic pathway and the Gli hedgehog pathways as examples.},
- author = {Daily, Kenneth and Patel, Vishal R and Rigor, Paul and Xie, Xiaohui and Baldi, Pierre and Yilmaz, A and Mejia-Guerra, MK and Kurz, K and Liang, X and Welch, L and Grotewold, E and Gallo, SM and Gerrard, DT and Miner, D and Simich, M and Soye, B Des and Bergman, CM and Halfon, MS and Griffith, OL and Montgomery, SB and Bernier, B and Chu, B and Kasaian, K and Aerts, S and Mahony, S and Sleumer, MC and Bilenky, M and Haeussler, M and Griffith, M and Gallo, SM and Giardine, B and Hooghe, B and Loo, P Van and Blanco, E and Ticoll, A and Lithwick, S and Portales-Casamar, E and Donaldson, IJ and Robertson, G and Wadelius, C and Bleser, P De and Vlieghe, D and Halfon, MS and Wasserman, W and Hardison, R and Bergman, CM and Jones, SJM and Consortium, TORA and Kolchanov, NA and Ignatieva, EV and Ananko, EA and Podkolodnaya, OA and Stepanenko, IL and Merkulova, TI and Pozdnyakov, MA and Podkolodny, NL and Naumochkin, AN and Romashchenko, AG and Xie, X and Rigor, P and Baldi, P and Portales-Casamar, E and Thongjuea, S and Kwon, AT and Arenillas, D and Zhao, X and Valen, E and Yusuf, D and Lenhard, B and Wasserman, WW and Sandelin, A and Matys, V and Fricke, E and Geffers, R and G{\"{o}}ssling, E and Haubrock, M and Hehl, R and Hornischer, K and Karas, D and Kel, AE and Kel-Margoulis, OV and Kloos, DUU and Land, S and Lewicki-Potapov, B and Michael, H and M{\"{u}}nch, R and Reuter, I and Rotert, S and Saxel, H and Scheer, M and Thiele, S and Wingender, E and Siepel, A and Bejerano, G and Pedersen, J and Hinrichs, A and Hou, M and Rosenbloom, K and Clawson, H and Spieth, J and Hillier, L and Richards, S and Pollard, KS and Hubisz, MJ and Rosenbloom, KR and Siepel, A and Ettwiller, L and Paten, B and Souren, M and Loosli, F and Wittbrodt, J and Birney, E and Elemento, O and Tavazoie, S and Xie, X and Lu, J and Kulbokas, EJ and Golub, TR and Mootha, V and Lindblad-Toh, K and Lander, ES and Kellis, M and Stark, A and Lin, MF and Kheradpour, P and Pedersen, JS and Parts, L and Carlson, JW and Crosby, MA and Rasmussen, MD and Roy, S and Deoras, AN and Ruby, GG and Brennecke, J and Hodges, E and Hinrichs, AS and Caspi, A and Paten, B and Park, SWW and Han, MV and Maeder, ML and Polansky, BJ and Robson, BE and Aerts, S and van Helden, J and Hassan, B and Gilbert, DG and Eastman, DA and Rice, M and Weir, M and Hahn, MW and Park, Y and Dewey, CN and Pachter, L and Kent, JJ and Haussler, D and Lai, EC and Bartel, DP and Hannon, GJ and Kaufman, TC and Eisen, MB and Clark, AG and Smith, D and Celniker, SE and Gelbart, WM and Kellis, M and Xie, X and Mikkelsen, TS and Gnirke, A and Lindblad-Toh, K and Kellis, M and Lander, ES and Rhead, B and Karolchik, D and Kuhn, RM and Hinrichs, AS and Zweig, AS and Fujita, PA and Diekhans, M and Smith, KE and Rosenbloom, KR and Raney, BJ and Pohl, A and Pheasant, M and Meyer, LR and Learned, K and Hsu, F and Hillman-Jackson, J and Harte, RA and Giardine, B and Dreszer, TR and Clawson, H and Barber, GP and Haussler, D and Kent, WJ and Blanchette, M and Kent, WJ and Riemer, C and Elnitski, L and Smit, AFA and Roskin, KM and Baertsch, R and Rosenbloom, K and Clawson, H and Green, ED and Haussler, D and Miller, W and Johnson, D and Mortazavi, A and Myers, R and Wold, B and Wei, C and Wu, Q and Vega, V and Chiu, K and Ng, P and Zhang, T and Shahab, A and Yong, H and Fu, Y and Weng, Z and Robertson, G and Hirst, M and Bainbridge, M and Bilenky, M and Zhao, Y and Zeng, T and Euskirchen, G and Bernier, B and Varhol, R and Delaney, A and Zeller, KI and Zhao, X and Lee, CWH and Chiu, KP and Yao, F and Yustein, JT and Ooi, HS and Orlov, YL and Shahab, A and Yong, HC and Fu, Y and Weng, Z and Kuznetsov, VA and Sung, WK and Ruan, Y and Dang, CV and Wei, CL and Lim, C and Yao, F and Wong, J and George, J and Xu, H and Chiu, K and Sung, W and Lipovich, L and Vega, V and Chen, J and Kim, T and Abdullaev, Z and Smith, A and Ching, K and Loukinov, D and Green, R and Zhang, M and Lobanenkov, V and Ren, B and Pruitt, KD and Tatusova, T and Klimke, W and Maglott, DR and Flicek, P and Amode, MR and Barrell, D and Beal, K and Brent, S and Chen, Y and Clapham, P and Coates, G and Fairley, S and Fitzgerald, S and Gordon, L and Hendrix, M and Hourlier, T and Johnson, N and K{\"{a}}h{\"{a}}ri, A and Keefe, D and Keenan, S and Kinsella, R and Kokocinski, F and Kulesha, E and Larsson, P and Longden, I and McLaren, W and Overduin, B and Pritchard, B and Riat, HS and Rios, D and Ritchie, GRS and Ruffier, M and Schuster, M and Sobral, D and Spudich, G and Tang, YA and Trevanion, S and Vandrovcova, J and Vilella, AJ and White, S and Wilder, SP and Zadissa, A and Zamora, J and Aken, BL and Birney, E and Cunningham, F and Dunham, I and Durbin, R and Fern{\'{a}}ndez-Suarez, XM and Herrero, J and Hubbard, TJP and Parker, A and Proctor, G and Vogel, J and Searle, SMJ and Ashburner, M and Ball, CA and Blake, JA and Botstein, D and Butler, H and Cherry, JM and Davis, AP and Dolinski, K and Dwight, SS and Eppig, JT and Harris, MA and Hill, DP and Issel-Tarver, L and Kasarskis, A and Lewis, S and Matese, JC and Richardson, JE and Ringwald, M and Rubin, GM and Sherlock, G and Drysdale, R and T, FC and Project, S and D'Souza, UM and Craig, IW and Sherry, ST and Ward, MH and Kholodov, M and Baker, J and Phan, L and Smigielski, EM and Sirotkin, K and Kanehisa, M and Goto, S and Ellisen, LW and Ramsayer, KD and Johannessen, CM and Yang, A and Beppu, H and Minda, K and Oliner, JD and McKeon, F and Haber, DA and Kawase, T and Ohki, R and Shibata, T and Tsutsumi, S and Kamimura, N and Inazawa, J and Ohta, T and Ichikawa, H and Aburatani, H and Tashiro, F and Taya, Y and Matise, MP and Joyner, AL and Mullor, JL and Dahmane, N and Sun, T and Altaba, A Ruiz i and Jiang, C and Xuan, Z and Zhao, F and Zhang, MQ and Weiner, HL and Bakst, R and Hurlbert, MS and Ruggiero, J and Ahn, E and Lee, WS and Stephen, D and Zagzag, D and Joyner, AL and Turnbull, DH and Hu, MC and Mo, R and Bhella, S and Wilson, CW and Chuang, PT and Hui, Cc and Rosenblum, ND and Vokes, SA and Ji, H and McCuine, S and Tenzen, T and Giles, S and Zhong, S and Longabaugh, WJR and Davidson, EH and Wong, WH and McMahon, AP and Santagati, F and Abe, K and Schmidt, V and Schmitt-John, T and Suzuki, M and Yamamura, Ki and Imai, K and Prasad, TS Keshava and Goel, R and Kandasamy, K and Keerthikumar, S and Kumar, S and Mathivanan, S and Telikicherla, D and Raju, R and Shafreen, B and Venugopal, A and Balakrishnan, L and Marimuthu, A and Banerjee, S and Somanathan, DS and Sebastian, A and Rani, S and Ray, S and Kishore, CJ Harrys and Kanth, S and Ahmed, M and Kashyap, MK and Mohmood, R and Ramachandra, YL and Krishna, V and Rahiman, BA and Mohan, S and Ranganathan, P and Ramabadran, S and Chaerkady, R and Pandey, A and Stark, C and Breitkreutz, BJ and Reguly, T and Boucher, L and Breitkreutz, A and Tyers, M and He, L and Hannon, GJ and Barrett, T and Troup, DB and Wilhite, SE and Ledoux, P and Rudnev, D and Evangelista, C and Kim, IF and Soboleva, A and Tomashevsky, M and Marshall, KA and Phillippy, KH and Sherman, PM and Muertter, RN and Edgar, R and Consortium, TEP and Simonis, M and Klous, P and Splinter, E and Moshkin, Y and Willemsen, R and de Wit, E and van Steensel, B and de Laat, W and Lieberman-Aiden, E and van Berkum, NL and Williams, L and Imakaev, M and Ragoczy, T and Telling, A and Amit, I and Lajoie, BR and Sabo, PJ and Dorschner, MO and Sandstrom, R and Bernstein, B and Bender, MA and Groudine, M and Gnirke, A and Stamatoyannopoulos, J and Mirny, LA and Lander, ES and Dekker, J and Fullwood, MJ and Wei, CL and Liu, ET and Ruan, Y and Hakim, O and Sung, MH and Voss, TC and Splinter, E and John, S and Sabo, PJ and Thurman, RE and Stamatoyannopoulos, JA and de Laat, W and Hager, GL and Ferrucci, D and Schmidt, D and Wilson, MD and Ballester, B and Schwalie, PC and Brown, GD and Marshall, A and Kutter, C and Watt, S and Martinez-Jimenez, CP and Mackay, S and Talianidis, I and Flicek, P and Odom, DT and King, DC and Taylor, J and Zhang, Y and Cheng, Y and Lawson, HA and Martin, J and Analysis, MS and Chiaromonte, F and Miller, W and Hardison, RC},
- doi = {10.1186/1471-2105-12-495},
- file = {:Users/ryan/Documents/Mendeley Desktop/Daily et al. - 2011 - MotifMap integrative genome-wide maps of regulatory motif sites for model species.pdf:pdf},
- isbn = {1471-2105},
- issn = {1471-2105},
- journal = {BMC Bioinformatics},
- keywords = {Algorithms,Bioinformatics,Combinatorial Libraries,Computational Biology/Bioinformatics,Computer Appl. in Life Sciences,Microarrays},
- number = {1},
- pages = {495},
- pmid = {22208852},
- title = {{MotifMap: integrative genome-wide maps of regulatory motif sites for model species}},
- url = {http://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-12-495},
- volume = {12},
- year = {2011}
- }
- @article{Sculley,
- author = {Sculley, D and Holt, Gary and Golovin, Daniel and Davydov, Eugene and Phillips, Todd and Ebner, Dietmar and Chaudhary, Vinay and Young, Michael and Dennison, Dan},
- file = {:Users/ryan/Documents/Mendeley Desktop/Sculley et al. - Unknown - Hidden Technical Debt in Machine Learning Systems.pdf:pdf},
- pages = {1--9},
- title = {{Hidden Technical Debt in Machine Learning Systems}},
- url = {http://papers.nips.cc/paper/5656-hidden-technical-debt-in-machine-learning-systems.pdf}
- }
- @article{Raabe2013,
- author = {Raabe, C. a. and Tang, T.-H. and Brosius, J. and Rozhdestvensky, T. S.},
- doi = {10.1093/nar/gkt1021},
- file = {:Users/ryan/Documents/Mendeley Desktop/Raabe et al. - 2013 - Biases in small RNA deep sequencing data.pdf:pdf},
- issn = {0305-1048},
- journal = {Nucleic Acids Research},
- month = {nov},
- pages = {1--13},
- title = {{Biases in small RNA deep sequencing data}},
- url = {http://nar.oxfordjournals.org/lookup/doi/10.1093/nar/gkt1021},
- year = {2013}
- }
- @article{Pimentel2016,
- abstract = {We describe a novel method for the differential analysis of RNA-Seq data that utilizes bootstrapping in conjunction with response error linear modeling to decouple biological variance from inferential variance. The method is implemented in an interactive shiny app called sleuth that utilizes kallisto quantifications and bootstraps for fast and accurate analysis of RNA-Seq experiments.},
- author = {Pimentel, Harold J and Bray, Nicolas and Puente, Suzette and Melsted, P{\'{a}}ll and Pachter, Lior},
- doi = {10.1101/058164},
- file = {:Users/ryan/Documents/Mendeley Desktop/Pimentel et al. - 2016 - Differential analysis of RNA-Seq incorporating quantification uncertainty.pdf:pdf},
- journal = {bioRxiv},
- pages = {058164},
- title = {{Differential analysis of RNA-Seq incorporating quantification uncertainty}},
- url = {http://biorxiv.org/content/biorxiv/early/2016/06/10/058164.full.pdf},
- year = {2016}
- }
- @article{Russo2016,
- abstract = {We present the advancements and novelties recently introduced in RNASeqGUI, a graphical user interface that helps biologists to handle and analyse large data collected in RNA-Seq experiments. This work focuses on the concept of reproducible research and shows how it has been incorporated in RNASeqGUI to provide reproducible (computational) results. The novel version of RNASeqGUI combines graphical interfaces with tools for reproducible research, such as literate statistical programming, human readable report, parallel executions, caching, and interactive and web-explorable tables of results. These features allow the user to analyse big datasets in a fast, efficient, and reproducible way. Moreover, this paper represents a proof of concept, showing a simple way to develop computational tools for Life Science in the spirit of reproducible research.},
- author = {Russo, Francesco and Righelli, Dario and Angelini, Claudia},
- doi = {10.1155/2016/7972351},
- file = {:Users/ryan/Documents/Mendeley Desktop/Russo, Righelli, Angelini - 2016 - Advancements in RNASeqGUI towards a Reproducible Analysis of RNA-Seq Experiments.pdf:pdf},
- issn = {23146141},
- journal = {BioMed Research International},
- pmid = {26977414},
- title = {{Advancements in RNASeqGUI towards a Reproducible Analysis of RNA-Seq Experiments}},
- url = {http://downloads.hindawi.com/journals/bmri/2016/7972351.pdf},
- volume = {2016},
- year = {2016}
- }
- @article{Mccarthy2009,
- abstract = {MOTIVATION: Statistical methods are used to test for the differential expression of genes in microarray experiments. The most widely used methods successfully test whether the true differential expression is different from zero, but give no assurance that the differences found are large enough to be biologically meaningful. RESULTS: We present a method, t-tests relative to a threshold (TREAT), that allows researchers to test formally the hypothesis (with associated p-values) that the differential expression in a microarray experiment is greater than a given (biologically meaningful) threshold. We have evaluated the method using simulated data, a dataset from a quality control experiment for microarrays and data from a biological experiment investigating histone deacetylase inhibitors. When the magnitude of differential expression is taken into account, TREAT improves upon the false discovery rate of existing methods and identifies more biologically relevant genes. AVAILABILITY: R code implementing our methods is contributed to the software package limma available at http://www.bioconductor.org.},
- author = {Mccarthy, Davis J. and Smyth, Gordon K.},
- doi = {10.1093/bioinformatics/btp053},
- file = {:Users/ryan/Documents/Mendeley Desktop/Mccarthy, Smyth - 2009 - Testing significance relative to a fold-change threshold is a TREAT.pdf:pdf},
- isbn = {1367-4811 (Electronic)$\backslash$r1367-4803 (Linking)},
- issn = {13674803},
- journal = {Bioinformatics},
- number = {6},
- pages = {765--771},
- pmid = {19176553},
- title = {{Testing significance relative to a fold-change threshold is a TREAT}},
- url = {http://bioinformatics.oxfordjournals.org/content/25/6/765.full.pdf},
- volume = {25},
- year = {2009}
- }
- @article{Valenzuela2017,
- abstract = {Solid organ transplantation is a curative therapy for hundreds of thousands of patients with end-stage organ failure. However, long-term outcomes have not improved, and nearly half of transplant recipients will lose their allografts by 10 years after transplant. One of the major challenges facing clinical transplantation is antibody-mediated rejection (AMR) caused by anti-donor HLA antibodies. AMR is highly associated with graft loss, but unfortunately there are few efficacious therapies to prevent and reverse AMR. This Review describes the clinical and histological manifestations of AMR, and discusses the immunopathological mechanisms contributing to antibody-mediated allograft injury as well as current and emerging therapies.},
- author = {Valenzuela, Nicole M. and Reed, Elaine F.},
- doi = {10.1172/JCI90597},
- file = {:Users/ryan/Documents/Mendeley Desktop/Valenzuela, Reed - 2017 - Antibody-mediated rejection across solid organ transplants Manifestations, mechanisms, and therapies.pdf:pdf},
- issn = {15588238},
- journal = {Journal of Clinical Investigation},
- number = {7},
- pages = {2492--2504},
- title = {{Antibody-mediated rejection across solid organ transplants: Manifestations, mechanisms, and therapies}},
- url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5490786/pdf/jci-127-90597.pdf},
- volume = {127},
- year = {2017}
- }
- @article{Langsrud2005,
- author = {Langsrud, {\O}},
- file = {:Users/ryan/Documents/Mendeley Desktop/Langsrud - 2005 - Rotation tests.pdf:pdf},
- journal = {Statistics and computing},
- keywords = {adjusted p-value,conditional inference,microarray data analysis,multiple endpoints,multiple testing,random orthogonal matrix,spherical distribution},
- number = {1975},
- pages = {53--60},
- title = {{Rotation tests}},
- url = {http://link.springer.com/article/10.1007/s11222-005-4789-5},
- year = {2005}
- }
- @article{Legault2013,
- abstract = {MOTIVATION: Alternative splicing and other processes that allow for different transcripts to be derived from the same gene are significant forces in the eukaryotic cell. RNA-Seq is a promising technology for analyzing alternative transcripts, as it does not require prior knowledge of transcript structures or genome sequences. However, analysis of RNA-Seq data in the presence of genes with large numbers of alternative transcripts is currently challenging due to efficiency, identifiability and representation issues.
- RESULTS: We present RNA-Seq models and associated inference algorithms based on the concept of probabilistic splice graphs, which alleviate these issues. We prove that our models are often identifiable and demonstrate that our inference methods for quantification and differential processing detection are efficient and accurate.
- AVAILABILITY: Software implementing our methods is available at http://deweylab.biostat.wisc.edu/psginfer.
- CONTACT: cdewey@biostat.wisc.edu SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.},
- author = {Legault, Laura H and Dewey, Colin N},
- doi = {10.1093/bioinformatics/btt396},
- file = {:Users/ryan/Documents/Mendeley Desktop/Legault, Dewey - 2013 - Inference of alternative splicing from RNA-Seq data with probabilistic splice graphs.pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- month = {aug},
- number = {18},
- pages = {2300--2310},
- pmid = {23846746},
- title = {{Inference of alternative splicing from RNA-Seq data with probabilistic splice graphs.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/23846746},
- volume = {29},
- year = {2013}
- }
- @article{Arnaud2016,
- abstract = {Transcriptome studies based on quantitative sequencing can estimate levels of gene expression by measuring target RNA abundance in sequencing libraries. Sequencing costs are proportional to the total number of sequenced reads, and in order to cover rare RNAs, considerable quantities of abundant and identical reads are needed. This major limitation can be addressed by depleting a proportion of the most abundant sequences from the library. However, such depletion strategies involve either extra handling of the input RNA sample or use of a large number of reverse transcription primers, termed not-so-random (NSR) primers, which are costly to synthesize. Taking advantage of the high tolerance of reverse transcriptase to mis-prime, we found that it is possible to use as few as 40 pseudo-random (PS) reverse transcription primers to decrease the rate of undesirable abundant sequences within a library without affecting the overall transcriptome diversity. PS primers are simple to design and can be used to deplete several undesirable RNAs simultaneously, thus creating a flexible tool for enriching transcriptome libraries for rare transcript sequences.},
- archivePrefix = {arXiv},
- arxivId = {10.1101/027805},
- author = {Arnaud, Oph{\'{e}}lie and Kato, Sachi and Poulain, St{\'{e}}phane and Plessy, Charles},
- doi = {10.2144/000114400},
- eprint = {027805},
- file = {:Users/ryan/Documents/Mendeley Desktop/Arnaud et al. - 2016 - Targeted reduction of highly abundant transcripts using pseudo-random primers.pdf:pdf},
- issn = {1940-9818},
- journal = {BioTechniques},
- keywords = {high-throughput sequencing,nanoCAGE,rRNA,undesirable sequences},
- month = {apr},
- number = {4},
- pages = {169--74},
- pmid = {27071605},
- primaryClass = {10.1101},
- title = {{Targeted reduction of highly abundant transcripts using pseudo-random primers}},
- url = {https://www.future-science.com/doi/10.2144/000114400},
- volume = {60},
- year = {2016}
- }
- @article{Blanchette2004,
- author = {Blanchette, Mathieu and Kent, WJ and Riemer, Cathy},
- doi = {10.1101/gr.1933104.6},
- file = {:Users/ryan/Documents/Mendeley Desktop/Blanchette, Kent, Riemer - 2004 - Aligning multiple genomic sequences with the threaded blockset aligner.pdf:pdf},
- journal = {Genome {\ldots}},
- pages = {708--715},
- title = {{Aligning multiple genomic sequences with the threaded blockset aligner}},
- url = {http://genome.cshlp.org/content/14/4/708.short},
- year = {2004}
- }
- @article{Wu2012,
- abstract = {Competitive gene set tests are commonly used in molecular pathway analysis to test for enrichment of a particular gene annotation category amongst the differential expression results from a microarray experiment. Existing gene set tests that rely on gene permutation are shown here to be extremely sensitive to inter-gene correlation. Several data sets are analyzed to show that inter-gene correlation is non-ignorable even for experiments on homogeneous cell populations using genetically identical model organisms. A new gene set test procedure (CAMERA) is proposed based on the idea of estimating the inter-gene correlation from the data, and using it to adjust the gene set test statistic. An efficient procedure is developed for estimating the inter-gene correlation and characterizing its precision. CAMERA is shown to control the type I error rate correctly regardless of inter-gene correlations, yet retains excellent power for detecting genuine differential expression. Analysis of breast cancer data shows that CAMERA recovers known relationships between tumor subtypes in very convincing terms. CAMERA can be used to analyze specified sets or as a pathway analysis tool using a database of molecular signatures.},
- author = {Wu, Di and Smyth, Gordon K},
- doi = {10.1093/nar/gks461},
- file = {:Users/ryan/Documents/Mendeley Desktop/Wu, Smyth - 2012 - Camera a competitive gene set test accounting for inter-gene correlation.pdf:pdf},
- issn = {1362-4962},
- journal = {Nucleic acids research},
- month = {sep},
- number = {17},
- pages = {e133},
- pmid = {22638577},
- title = {{Camera: a competitive gene set test accounting for inter-gene correlation.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3458527{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {40},
- year = {2012}
- }
- @article{Sfeir2012,
- abstract = {The telomere end-protection problem is defined by the aggregate of DNA damage signaling and repair pathways that require repression at telomeres. To define the end-protection problem, we removed the whole shelterin complex from mouse telomeres through conditional deletion of TRF1 and TRF2 in nonhomologous end-joining (NHEJ) deficient cells. The data reveal two DNA damage response pathways not previously observed upon deletion of individual shelterin proteins. The shelterin-free telomeres are processed by microhomology-mediated alternative-NHEJ when Ku70/80 is absent and are attacked by nucleolytic degradation in the absence of 53BP1. The data establish that the end-protection problem is specified by six pathways [ATM (ataxia telangiectasia mutated) and ATR (ataxia telangiectasia and Rad3 related) signaling, classical-NHEJ, alt-NHEJ, homologous recombination, and resection] and show how shelterin acts with general DNA damage response factors to solve this problem.},
- author = {Sfeir, Agnel and de Lange, Titia},
- doi = {10.1126/science.1218498},
- file = {:Users/ryan/Documents/Mendeley Desktop/Sfeir, de Lange - 2012 - Removal of shelterin reveals the telomere end-protection problem.pdf:pdf},
- issn = {1095-9203},
- journal = {Science (New York, N.Y.)},
- keywords = {Animals,Antigens, Nuclear,Antigens, Nuclear: genetics,Antigens, Nuclear: metabolism,Cell Cycle,Cell Cycle Proteins,Cell Cycle Proteins: metabolism,Cells, Cultured,Chromosomal Proteins, Non-Histone,Chromosomal Proteins, Non-Histone: metabolism,DNA Breaks, Double-Stranded,DNA End-Joining Repair,DNA Ligases,DNA Ligases: metabolism,DNA Repair,DNA-Binding Proteins,DNA-Binding Proteins: genetics,DNA-Binding Proteins: metabolism,Homologous Recombination,Mice,Mice, Knockout,Poly(ADP-ribose) Polymerases,Poly(ADP-ribose) Polymerases: metabolism,Protein-Serine-Threonine Kinases,Protein-Serine-Threonine Kinases: metabolism,Signal Transduction,Telomere,Telomere Homeostasis,Telomere-Binding Proteins,Telomere-Binding Proteins: genetics,Telomere-Binding Proteins: metabolism,Telomere: metabolism,Telomere: ultrastructure,Telomeric Repeat Binding Protein 1,Telomeric Repeat Binding Protein 1: genetics,Telomeric Repeat Binding Protein 1: metabolism,Telomeric Repeat Binding Protein 2,Telomeric Repeat Binding Protein 2: genetics,Telomeric Repeat Binding Protein 2: metabolism,Tumor Suppressor Proteins,Tumor Suppressor Proteins: metabolism},
- month = {may},
- number = {6081},
- pages = {593--7},
- pmid = {22556254},
- title = {{Removal of shelterin reveals the telomere end-protection problem.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/22556254},
- volume = {336},
- year = {2012}
- }
- @article{Mir2010,
- abstract = {Kinases execute pivotal cellular functions and are therefore widely investigated as potential targets in anticancer treatment. Here we analyze the kinase gene expression profiles of various tumor types and reveal the wee1 kinase to be overexpressed in glioblastomas. We demonstrate that WEE1 is a major regulator of the G(2) checkpoint in glioblastoma cells. Inhibition of WEE1 by siRNA or small molecular compound in cells exposed to DNA damaging agents results in abrogation of the G(2) arrest, premature termination of DNA repair, and cell death. Importantly, we show that the small-molecule inhibitor of WEE1 sensitizes glioblastoma to ionizing radiation in vivo. Our results suggest that inhibition of WEE1 kinase holds potential as a therapeutic approach in treatment of glioblastoma.},
- author = {Mir, Shahryar E and {De Witt Hamer}, Philip C and Krawczyk, Przemek M and Balaj, Leonora and Claes, An and Niers, Johanna M and {Van Tilborg}, Angela A G and Zwinderman, Aeilko H and Geerts, Dirk and Kaspers, Gertjan J L and {Peter Vandertop}, W and Cloos, Jacqueline and Tannous, Bakhos A and Wesseling, Pieter and Aten, Jacob a and Noske, David P and {Van Noorden}, Cornelis J F and W{\"{u}}rdinger, Thomas and Hamer, Philip C De Witt and Tilborg, Angela A G Van and Vandertop, W Peter and Noorden, Cornelis J F Van and Wu, Thomas},
- doi = {10.1016/j.ccr.2010.08.011},
- file = {:Users/ryan/Documents/Mendeley Desktop/Mir et al. - 2010 - In silico analysis of kinase expression identifies WEE1 as a gatekeeper against mitotic catastrophe in glioblasto(2).pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Mir et al. - 2010 - In silico analysis of kinase expression identifies WEE1 as a gatekeeper against mitotic catastrophe in glioblastoma.pdf:pdf},
- issn = {1878-3686},
- journal = {Cancer cell},
- keywords = {Amplified Fragment Length Polymorphism Analysis,Animal,Animals,Cell Cycle,Cell Cycle Proteins,Cell Cycle Proteins: antagonists {\&} inhibitors,Cell Cycle Proteins: biosynthesis,Cell Cycle Proteins: genetics,Cell Cycle Proteins: physiology,Cell Cycle: drug effects,Cell Cycle: genetics,DNA Damage,DNA Repair,Disease Models,G2 Phase,G2 Phase: physiology,Gene Expression Profiling,Glioblastoma,Glioblastoma: drug therapy,Glioblastoma: enzymology,Glioblastoma: genetics,Glioblastoma: pathology,Humans,Mice,Microarray Analysis,Mitosis,Mitosis: physiology,Nuclear Proteins,Nuclear Proteins: antagonists {\&} inhibitors,Nuclear Proteins: biosynthesis,Nuclear Proteins: genetics,Nuclear Proteins: physiology,Nude,Protein-Tyrosine Kinases,Protein-Tyrosine Kinases: antagonists {\&} inhibitors,Protein-Tyrosine Kinases: biosynthesis,Protein-Tyrosine Kinases: genetics,Protein-Tyrosine Kinases: physiology,Pyrimidines,Pyrimidines: pharmacology,Tumor Suppressor Protein p53,Tumor Suppressor Protein p53: genetics,Tumor Suppressor Protein p53: metabolism},
- month = {sep},
- number = {3},
- pages = {244--57},
- pmid = {20832752},
- title = {{In silico analysis of kinase expression identifies WEE1 as a gatekeeper against mitotic catastrophe in glioblastoma.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3115571{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {18},
- year = {2010}
- }
- @article{Yang2005,
- abstract = {A common objective of microarray experiments is the detection of differential gene expression between samples obtained under different conditions. The task of identifying differentially expressed genes consists of two aspects: ranking and selection. Numerous statistics have been proposed to rank genes in order of evidence for differential expression. However, no one statistic is universally optimal and there is seldom any basis or guidance that can direct toward a particular statistic of choice.},
- author = {Yang, Yee Hwa and Xiao, Yuanyuan and Segal, Mark R},
- doi = {10.1093/bioinformatics/bti108},
- file = {:Users/ryan/Documents/Mendeley Desktop/Yang, Xiao, Segal - 2005 - Identifying differentially expressed genes from microarray experiments via statistic synthesis.pdf:pdf},
- issn = {1367-4803},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {Algorithms,Computer Simulation,Data Interpretation, Statistical,Gene Expression Profiling,Gene Expression Profiling: methods,Models, Genetic,Models, Statistical,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Sequence Analysis, DNA,Sequence Analysis, DNA: methods,Software},
- month = {apr},
- number = {7},
- pages = {1084--93},
- pmid = {15513985},
- title = {{Identifying differentially expressed genes from microarray experiments via statistic synthesis.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/15513985},
- volume = {21},
- year = {2005}
- }
- @article{Rozowsky2009,
- abstract = {Chromatin immunoprecipitation (ChIP) followed by tag sequencing (ChIP-seq) using high-throughput next-generation instrumentation is fast, replacing chromatin immunoprecipitation followed by genome tiling array analysis (ChIP-chip) as the preferred approach for mapping of sites of transcription-factor binding and chromatin modification. Using two deeply sequenced data sets for human RNA polymerase II and STAT1, each with matching input-DNA controls, we describe a general scoring approach to address unique challenges in ChIP-seq data analysis. Our approach is based on the observation that sites of potential binding are strongly correlated with signal peaks in the control, likely revealing features of open chromatin. We develop a two-pass strategy called PeakSeq to compensate for this. A two-pass strategy compensates for signal caused by open chromatin, as revealed by inclusion of the controls. The first pass identifies putative binding sites and compensates for genomic variation in the 'mappability' of sequences. The second pass filters out sites not significantly enriched compared to the normalized control, computing precise enrichments and significances. Our scoring procedure enables us to optimize experimental design by estimating the depth of sequencing required for a desired level of coverage and demonstrating that more than two replicates provides only a marginal gain in information.},
- author = {Rozowsky, Joel and Euskirchen, Ghia and Auerbach, Raymond K and Zhang, Zhengdong D and Gibson, Theodore and Bjornson, Robert and Carriero, Nicholas and Snyder, Michael and Gerstein, Mark B},
- doi = {10.1038/nbt.1518},
- file = {:Users/ryan/Documents/Mendeley Desktop/Rozowsky et al. - 2009 - PeakSeq enables systematic scoring of ChIP-seq experiments relative to controls.pdf:pdf},
- issn = {1546-1696},
- journal = {Nature biotechnology},
- keywords = {Binding Sites,Biotechnology,Biotechnology: methods,Chromatin,Chromatin Immunoprecipitation,Chromatin Immunoprecipitation: methods,Chromatin: chemistry,DNA,DNA: chemistry,False Positive Reactions,Genetic Variation,Genome,Genomics,Humans,Models, Genetic,Oligonucleotide Array Sequence Analysis,RNA Polymerase II,RNA Polymerase II: chemistry,Sequence Analysis, DNA,Software},
- month = {jan},
- number = {1},
- pages = {66--75},
- pmid = {19122651},
- title = {{PeakSeq enables systematic scoring of ChIP-seq experiments relative to controls.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2924752{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {27},
- year = {2009}
- }
- @techreport{NuGEN2010,
- author = {NuGEN},
- file = {:Users/ryan/Documents/Mendeley Desktop/NuGEN - 2010 - Performance verification of the automated NuGEN Ovation Whole Blood Solution.pdf:pdf},
- title = {{Performance verification of the automated NuGEN Ovation Whole Blood Solution}},
- url = {http://www.nugeninc.com/nugen/?LinkServID=89366653-85CF-44AC-80672BBD775B0170},
- year = {2010}
- }
- @article{Raman2014a,
- author = {Raman, Indira M.},
- doi = {10.1016/j.neuron.2013.12.030},
- file = {:Users/ryan/Documents/Mendeley Desktop/Raman - 2014 - How to Be a Graduate Advisee.pdf:pdf},
- issn = {08966273},
- journal = {Neuron},
- month = {jan},
- number = {1},
- pages = {9--11},
- publisher = {Elsevier Inc.},
- title = {{How to Be a Graduate Advisee}},
- url = {http://linkinghub.elsevier.com/retrieve/pii/S0896627313011914},
- volume = {81},
- year = {2014}
- }
- @article{Nettleton2006,
- author = {Nettleton, Dan and Hwang, J. T. Gene and Caldo, Rico a. and Wise, Roger P.},
- doi = {10.1198/108571106X129135},
- file = {:Users/ryan/Documents/Mendeley Desktop/Nettleton et al. - 2006 - Estimating the number of true null hypotheses from a histogram of p values.pdf:pdf},
- issn = {1085-7117},
- journal = {Journal of Agricultural, Biological, and Environmental Statistics},
- keywords = {false discovery rate,microarray data,multiple testing},
- month = {sep},
- number = {3},
- pages = {337--356},
- title = {{Estimating the number of true null hypotheses from a histogram of p values}},
- url = {http://www.springerlink.com/index/10.1198/108571106X129135},
- volume = {11},
- year = {2006}
- }
- @article{Clark,
- author = {Clark, Neil R and Hu, Kevin and Feldmann, Axel S and Kou, Yan and Chen, Edward Y and Duan, Qiaonan and Therapeutics, Systems and Biology, Systems},
- file = {:Users/ryan/Documents/Mendeley Desktop/Clark et al. - 2014 - The characteristic direction a geometrical approach to identify differentially expressed genes.pdf:pdf},
- title = {{Supplementary Materials for :}}
- }
- @article{Tarca2009,
- abstract = {MOTIVATION:Gene expression class comparison studies may identify hundreds or thousands of genes as differentially expressed (DE) between sample groups. Gaining biological insight from the result of such experiments can be approached, for instance, by identifying the signaling pathways impacted by the observed changes. Most of the existing pathway analysis methods focus on either the number of DE genes observed in a given pathway (enrichment analysis methods), or on the correlation between the pathway genes and the class of the samples (functional class scoring methods). Both approaches treat the pathways as simple sets of genes, disregarding the complex gene interactions that these pathways are built to describe.$\backslash$n$\backslash$nRESULTS:We describe a novel signaling pathway impact analysis (SPIA) that combines the evidence obtained from the classical enrichment analysis with a novel type of evidence, which measures the actual perturbation on a given pathway under a given condition. A bootstrap procedure is used to assess the significance of the observed total pathway perturbation. Using simulations we show that the evidence derived from perturbations is independent of the pathway enrichment evidence. This allows us to calculate a global pathway significance P-value, which combines the enrichment and perturbation P-values. We illustrate the capabilities of the novel method on four real datasets. The results obtained on these data show that SPIA has better specificity and more sensitivity than several widely used pathway analysis methods.$\backslash$n$\backslash$nAVAILABILITY:SPIA was implemented as an R package available at http://vortex.cs.wayne.edu/ontoexpress/},
- author = {Tarca, Adi Laurentiu and Draghici, Sorin and Khatri, Purvesh and Hassan, Sonia S. and Mittal, Pooja and Kim, Jung-sun and Kim, Chong Jai and Kusanovic, Juan Pedro and Romero, Roberto},
- doi = {10.1093/bioinformatics/btn577},
- file = {:Users/ryan/Documents/Mendeley Desktop/Tarca et al. - 2009 - A novel signaling pathway impact analysis.pdf:pdf},
- isbn = {1367-4811 (Electronic)$\backslash$n1367-4803 (Linking)},
- issn = {1460-2059},
- journal = {Bioinformatics},
- month = {jan},
- number = {1},
- pages = {75--82},
- pmid = {18990722},
- title = {{A novel signaling pathway impact analysis}},
- url = {https://oup.silverchair-cdn.com/oup/backfile/Content{\_}public/Journal/bioinformatics/25/1/10.1093{\_}bioinformatics{\_}btn577/3/btn577.pdf?Expires=1497746303{\&}Signature=M7m3OHx1KBlfhRighyR8Zim{~}meXKq8NxAY6dWS{~}iIyfGqG3az08qaRlPBl78UEMwBn3N1jj36YQUrryS-heH9nRPfhSOR7K},
- volume = {25},
- year = {2009}
- }
- @article{Tan2009,
- abstract = {Joint analysis of transcriptomic and proteomic data taken from the same samples has the potential to elucidate complex biological mechanisms. Most current methods that integrate these datasets allow for the computation of the correlation between a gene and protein but only after a one-to-one matching of genes and proteins is done. However, genes and proteins are connected via biological pathways and their relationship is not necessarily one-to-one. In this paper, we investigate the use of Correlated Factor Analysis (CFA) for modeling the correlation of genome-scale gene and protein data. Unlike existing approaches, CFA considers all possible gene-protein pairs and utilizes all gene and protein information in its modeling framework. The Generalized Singular Value Decomposition (gSVD) is another method which takes into account all available transcriptomic and proteomic data. Comparison is made between CFA and gSVD.},
- author = {Tan, Chuen Seng and Salim, Agus and Ploner, Alexander and Lehti{\"{o}}, Janne and Chia, Kee Seng and Pawitan, Yudi},
- doi = {10.1186/1471-2105-10-272},
- file = {:Users/ryan/Documents/Mendeley Desktop/Tan et al. - 2009 - Correlating gene and protein expression data using Correlated Factor Analysis.pdf:pdf},
- isbn = {1471210510},
- issn = {1471-2105},
- journal = {BMC bioinformatics},
- keywords = {Algorithms,Computational Biology,Computational Biology: methods,Gene Expression,Gene Expression Profiling,Gene Expression Profiling: methods,Proteins,Proteins: chemistry,Proteins: genetics,Proteins: metabolism},
- month = {jan},
- pages = {272},
- pmid = {19723309},
- title = {{Correlating gene and protein expression data using Correlated Factor Analysis.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2744708{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {10},
- year = {2009}
- }
- @article{Martin2012,
- abstract = {We propose a flexible and identifiable version of the two-groups model, motivated by hierarchical Bayes considerations, that features an empirical null and a semiparametric mixture model for the non-null cases. We use a computationally efficient predictive recursion marginal likelihood procedure to estimate the model parameters, even the nonparametric mixing distribution. This leads to a nonparametric empirical Bayes testing procedure, which we call PRtest, based on thresholding the estimated local false discovery rates. Simulations and real-data examples demonstrate that, compared to existing approaches, PRtest's careful handling of the non-null density can give a much better fit in the tails of the mixture distribution which, in turn, can lead to more realistic conclusions.},
- archivePrefix = {arXiv},
- arxivId = {1106.3885},
- author = {Martin, Ryan and Tokdar, Surya T.},
- doi = {10.1093/biostatistics/kxr039},
- eprint = {1106.3885},
- file = {:Users/ryan/Documents/Mendeley Desktop/Martin, Tokdar - 2012 - A nonparametric empirical Bayes framework for large-scale multiple testing.pdf:pdf},
- isbn = {1465-4644},
- issn = {14654644},
- journal = {Biostatistics},
- keywords = {Dirichlet process,Marginal likelihood,Mixture model,Predictive recursion,Two-groups model},
- number = {3},
- pages = {427--439},
- pmid = {22085895},
- title = {{A nonparametric empirical Bayes framework for large-scale multiple testing}},
- url = {https://oup.silverchair-cdn.com/oup/backfile/Content{\_}public/Journal/biostatistics/13/3/10.1093{\_}biostatistics{\_}kxr039/1/kxr039.pdf?Expires=1501818929{\&}Signature=H9nLv8RjSUITVhxOAN0OypDmwY-O-PzDmmYOxMIHys5aurF2D{~}OZgSmL61Fvov3UJjJu39boRHXtCSn8-1l0-K9LjdSiOvLFd},
- volume = {13},
- year = {2012}
- }
- @article{Kapourani2018b,
- author = {Kapourani, Chantriolnt-Andreas and Sanguinetti, Guido},
- doi = {10.1093/bioinformatics/bty129},
- file = {:Users/ryan/Documents/Mendeley Desktop/Kapourani, Sanguinetti - 2018 - BPRMeth a flexible Bioconductor package for modelling methylation profiles.pdf:pdf},
- issn = {1367-4803},
- journal = {Bioinformatics},
- number = {May},
- pages = {1--2},
- title = {{BPRMeth: a flexible Bioconductor package for modelling methylation profiles}},
- url = {https://academic.oup.com/bioinformatics/advance-article/doi/10.1093/bioinformatics/bty129/4924211},
- year = {2018}
- }
- @article{Sing2005,
- author = {Sing, T. and Sander, O. and Beerenwinkel, N. and Lengauer, T.},
- doi = {10.1093/bioinformatics/bti623},
- file = {:Users/ryan/Documents/Mendeley Desktop/Sing et al. - 2005 - ROCR visualizing classifier performance in R.pdf:pdf},
- issn = {1367-4803},
- journal = {Bioinformatics},
- month = {aug},
- number = {20},
- pages = {3940--3941},
- title = {{ROCR: visualizing classifier performance in R}},
- url = {http://bioinformatics.oxfordjournals.org/cgi/doi/10.1093/bioinformatics/bti623},
- volume = {21},
- year = {2005}
- }
- @article{Johnson2007,
- abstract = {Non-biological experimental variation or "batch effects" are commonly observed across multiple batches of microarray experiments, often rendering the task of combining data from these batches difficult. The ability to combine microarray data sets is advantageous to researchers to increase statistical power to detect biological phenomena from studies where logistical considerations restrict sample size or in studies that require the sequential hybridization of arrays. In general, it is inappropriate to combine data sets without adjusting for batch effects. Methods have been proposed to filter batch effects from data, but these are often complicated and require large batch sizes ( {\textgreater} 25) to implement. Because the majority of microarray studies are conducted using much smaller sample sizes, existing methods are not sufficient. We propose parametric and non-parametric empirical Bayes frameworks for adjusting data for batch effects that is robust to outliers in small sample sizes and performs comparable to existing methods for large samples. We illustrate our methods using two example data sets and show that our methods are justifiable, easy to apply, and useful in practice. Software for our method is freely available at: http://biosun1.harvard.edu/complab/batch/.},
- author = {Johnson, W Evan and Li, Cheng and Rabinovic, Ariel},
- doi = {10.1093/biostatistics/kxj037},
- file = {:Users/ryan/Documents/Mendeley Desktop/Johnson, Li, Rabinovic - 2007 - Adjusting batch effects in microarray expression data using empirical Bayes methods(2).pdf:pdf},
- issn = {1465-4644},
- journal = {Biostatistics (Oxford, England)},
- keywords = {Bayes Theorem,Data Interpretation,Gene Expression Profiling,Gene Expression Profiling: methods,Humans,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Statistical},
- month = {jan},
- number = {1},
- pages = {118--27},
- pmid = {16632515},
- title = {{Adjusting batch effects in microarray expression data using empirical Bayes methods.}},
- url = {http://biostatistics.oxfordjournals.org/content/suppl/2006/04/21/kxj037.DC1/kxj037supp.pdf http://www.ncbi.nlm.nih.gov/pubmed/16632515},
- volume = {8},
- year = {2007}
- }
- @article{Ernst2012,
- abstract = {Methylation of histone H3 at lysine 4 (H3K4) is a conserved feature of active chromatin catalyzed by methyltransferases of the SET1-family (SET1A, SET1B, MLL1, MLL2, MLL3 and MLL4 in humans). These enzymes participate in diverse gene regulatory networks with a multitude of known biological functions, including direct involvement in several human disease states. Unlike most lysine methyltransferases, SET1-family enzymes are only fully active in the context of a multi-subunit complex, which includes a protein module comprised of WDR5, RbBP5, ASH2L and DPY-30 (WRAD). These proteins bind in close proximity to the catalytic SET domain of SET1-family enzymes and stimulate H3K4 methyltransferase activity. The mechanism by which WRAD promotes catalysis involves elements of allosteric control and possibly the utilization of a second H3K4 methyltransferase active site present within WRAD itself. WRAD components also engage in physical interactions that recruit SET1-family proteins to target sites on chromatin. Here, the known molecular mechanisms through which WRAD enables the function of SET1-related enzymes will be reviewed.},
- author = {Ernst, Patricia and Vakoc, Christopher R.},
- doi = {10.1093/bfgp/els017},
- file = {:Users/ryan/Documents/Mendeley Desktop/Ernst, Vakoc - 2012 - WRAD enabler of the SET1-family of H3K4 methyltransferases.pdf:pdf},
- isbn = {2041-2649},
- issn = {2041-2649},
- journal = {Briefings in Functional Genomics},
- keywords = {ASH2L,DPY-30,MLL,RbBP5,SET1,WDR5},
- month = {may},
- number = {3},
- pages = {217--226},
- pmid = {22652693},
- title = {{WRAD: enabler of the SET1-family of H3K4 methyltransferases}},
- url = {http://bfgp.oxfordjournals.org/cgi/doi/10.1093/bfgp/els017},
- volume = {11},
- year = {2012}
- }
- @article{Storey2017,
- author = {Storey, John D},
- file = {:Users/ryan/Documents/Mendeley Desktop/Storey - 2017 - The Functional False Discovery Rate with Applications in Genomics.pdf:pdf},
- pages = {1--27},
- title = {{The Functional False Discovery Rate with Applications in Genomics}},
- url = {https://www.biorxiv.org/content/biorxiv/early/2017/12/30/241133.full.pdf},
- year = {2017}
- }
- @article{Dutta2012a,
- abstract = {UNLABELLED:
- BACKGROUND: Identification of canonical pathways through enrichment of differentially expressed genes in a given pathway is a widely used method for interpreting gene lists generated from high-throughput experimental studies. However, most algorithms treat pathways as sets of genes, disregarding any inter- and intra-pathway connectivity information, and do not provide insights beyond identifying lists of pathways.
- RESULTS: We developed an algorithm (PathNet) that utilizes the connectivity information in canonical pathway descriptions to help identify study-relevant pathways and characterize non-obvious dependencies and connections among pathways using gene expression data. PathNet considers both the differential expression of genes and their pathway neighbors to strengthen the evidence that a pathway is implicated in the biological conditions characterizing the experiment. As an adjunct to this analysis, PathNet uses the connectivity of the differentially expressed genes among all pathways to score pathway contextual associations and statistically identify biological relations among pathways. In this study, we used PathNet to identify biologically relevant results in two Alzheimer's disease microarray datasets, and compared its performance with existing methods. Importantly, PathNet identified de-regulation of the ubiquitin-mediated proteolysis pathway as an important component in Alzheimer's disease progression, despite the absence of this pathway in the standard enrichment analyses.
- CONCLUSIONS: PathNet is a novel method for identifying enrichment and association between canonical pathways in the context of gene expression data. It takes into account topological information present in pathways to reveal biological information. PathNet is available as an R workspace image from http://www.bhsai.org/downloads/pathnet/.},
- author = {Dutta, Bhaskar and Wallqvist, Anders and Reifman, Jaques},
- doi = {10.1186/1751-0473-7-10},
- file = {:Users/ryan/Documents/Mendeley Desktop/Dutta, Wallqvist, Reifman - 2012 - PathNet a tool for pathway analysis using topological information.pdf:pdf},
- issn = {1751-0473},
- journal = {Source code for biology and medicine},
- keywords = {canonical pathways,pathway association,pathway enrichment,pathway interaction,pathway topology},
- month = {jan},
- number = {1},
- pages = {10},
- pmid = {23006764},
- publisher = {Source Code for Biology and Medicine},
- title = {{PathNet: a tool for pathway analysis using topological information.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3563509{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {7},
- year = {2012}
- }
- @article{Chung2015,
- author = {Chung, N. C. and Storey, J. D.},
- doi = {10.1093/bioinformatics/btu674},
- file = {:Users/ryan/Documents/Mendeley Desktop/Chung, Storey - 2015 - Statistical significance of variables driving systematic variation in high-dimensional data.pdf:pdf},
- issn = {1367-4803},
- journal = {Bioinformatics},
- number = {4},
- pages = {545--554},
- title = {{Statistical significance of variables driving systematic variation in high-dimensional data}},
- url = {http://bioinformatics.oxfordjournals.org/cgi/doi/10.1093/bioinformatics/btu674},
- volume = {31},
- year = {2015}
- }
- @article{Lo2011,
- abstract = {Humans are diploid, carrying two copies of each chromosome, one from each parent. Separating the paternal and maternal chromosomes is an important component of genetic analyses such as determining genetic association, inferring evolutionary scenarios, computing recombination rates, and detecting cis-regulatory events. As the pair of chromosomes are mostly identical to each other, linking together of alleles at heterozygous sites is sufficient to phase, or separate the two chromosomes. In Haplotype Assembly, the linking is done by sequenced fragments that overlap two heterozygous sites. While there has been a lot of research on correcting errors to achieve accurate haplotypes via assembly, relatively little work has been done on designing sequencing experiments to get long haplotypes. Here, we describe the different design parameters that can be adjusted with next generation and upcoming sequencing technologies, and study the impact of design choice on the length of the haplotype.},
- author = {Lo, Christine and Bashir, Ali and Bansal, Vikas and Bafna, Vineet},
- doi = {10.1186/1471-2105-12-S1-S24},
- file = {:Users/ryan/Documents/Mendeley Desktop/Lo et al. - 2011 - Strobe sequence design for haplotype assembly.pdf:pdf},
- issn = {1471-2105},
- journal = {BMC bioinformatics},
- keywords = {Algorithms,Computational Biology,Computational Biology: methods,Genome, Human,Genomics,Genomics: methods,Haplotypes,Humans,Polymorphism, Single Nucleotide,Sequence Analysis, DNA,Sequence Analysis, DNA: methods},
- month = {jan},
- number = {Suppl 1},
- pages = {S24},
- pmid = {21342554},
- publisher = {BioMed Central Ltd},
- title = {{Strobe sequence design for haplotype assembly.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3044279{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {12 Suppl 1},
- year = {2011}
- }
- @article{Togel2010,
- abstract = {Acute kidney injury (AKI) is a common clinical complication, associated with poor outcomes and the development of chronic kidney disease. Despite major advances in the understanding of its pathophysiology, available therapies for AKI are only supportive; therefore, adequate functional recovery from AKI must predominantly rely on the kidney's own reparative ability. An extensive body of preclinical data from our own and from other laboratories has shown that administration of adult multipotent marrow stromal cells (commonly referred to as mesenchymal stem cells [MSCs]), effectively ameliorates experimental AKI by exerting paracrine renoprotective effects and by stimulating tissue repair. Based on these findings, a clinical trial has been conducted to investigate the safety and efficacy of MSCs administered to open-heart surgery patients who are at high risk of postoperative AKI. In this Perspectives article, we discuss some of the early data from this trial and describe potential applications for stem cell therapies in other fields of nephrology.},
- author = {T{\"{o}}gel, Florian E and Westenfelder, Christof},
- doi = {10.1038/nrneph.2009.229},
- file = {:Users/ryan/Documents/Mendeley Desktop/T{\"{o}}gel, Westenfelder - 2010 - Mesenchymal stem cells a new therapeutic tool for AKI.pdf:pdf},
- issn = {1759-507X},
- journal = {Nature reviews. Nephrology},
- keywords = {Acute Kidney Injury,Acute Kidney Injury: etiology,Acute Kidney Injury: mortality,Acute Kidney Injury: therapy,Animals,Clinical Trials, Phase I as Topic,Coronary Artery Bypass,Coronary Artery Bypass: adverse effects,Coronary Artery Bypass: methods,Coronary Disease,Coronary Disease: diagnosis,Coronary Disease: therapy,Disease Models, Animal,Female,Follow-Up Studies,Graft Rejection,Graft Survival,Humans,Kidney Failure, Chronic,Kidney Failure, Chronic: etiology,Kidney Failure, Chronic: physiopathology,Kidney Failure, Chronic: prevention {\&} control,Male,Mesenchymal Stem Cell Transplantation,Mesenchymal Stem Cell Transplantation: adverse eff,Mesenchymal Stem Cell Transplantation: methods,Mice,Rats,Risk Assessment,Survival Rate,Transplantation, Autologous,Treatment Outcome,acute-kidney-injury,cyno-project},
- mendeley-tags = {acute-kidney-injury,cyno-project},
- month = {mar},
- number = {3},
- pages = {179--83},
- pmid = {20186233},
- title = {{Mesenchymal stem cells: a new therapeutic tool for AKI.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/20186233},
- volume = {6},
- year = {2010}
- }
- @article{Troyanskaya2001,
- abstract = {MOTIVATION: Gene expression microarray experiments can generate data sets with multiple missing expression values. Unfortunately, many algorithms for gene expression analysis require a complete matrix of gene array values as input. For example, methods such as hierarchical clustering and K-means clustering are not robust to missing data, and may lose effectiveness even with a few missing values. Methods for imputing missing data are needed, therefore, to minimize the effect of incomplete data sets on analyses, and to increase the range of data sets to which these algorithms can be applied. In this report, we investigate automated methods for estimating missing data. RESULTS: We present a comparative study of several methods for the estimation of missing values in gene microarray data. We implemented and evaluated three methods: a Singular Value Decomposition (SVD) based method (SVDimpute), weighted K-nearest neighbors (KNNimpute), and row average. We evaluated the methods using a variety of parameter settings and over different real data sets, and assessed the robustness of the imputation methods to the amount of missing data over the range of 1--20{\%} missing values. We show that KNNimpute appears to provide a more robust and sensitive method for missing value estimation than SVDimpute, and both SVDimpute and KNNimpute surpass the commonly used row average method (as well as filling missing values with zeros). We report results of the comparative experiments and provide recommendations and tools for accurate estimation of missing microarray data under a variety of conditions.},
- author = {Troyanskaya, O and Cantor, M and Sherlock, G and Brown, P and Hastie, T and Tibshirani, R and Botstein, D and Altman, R B},
- file = {:Users/ryan/Documents/Mendeley Desktop/Troyanskaya et al. - 2001 - Missing value estimation methods for DNA microarrays.pdf:pdf},
- isbn = {1367-4803},
- journal = {Bioinformatics. 2001 Jun;},
- keywords = {*Algorithms,*Data Interpretation, Statistical,*Mathematical Computing,*Oligonucleotide Array Sequence Analysis statistic,Cell Cycle genetics,Cluster Analysis,Comparative Study,Data Display,Gene Expression,Multigene Family,Saccharomyces cerevisiae genetics,Sensitivity and Specificity,Software,Support, Non U.S. Gov't,Support, U.S. Gov't, Non P.H.S.,Support, U.S. Gov't, P.H.S.},
- number = {6},
- pages = {520--525},
- title = {{Missing value estimation methods for DNA microarrays}},
- volume = {17},
- year = {2001}
- }
- @article{Rapaport2013,
- author = {Rapaport, Franck and Khanin, Raya and Liang, Yupu and Pirun, Mono and Krek, Azra and Zumbo, Paul and Mason, Christopher E and Socci, Nicholas D and Betel, Doron},
- doi = {10.1186/gb-2013-14-9-r95},
- file = {:Users/ryan/Documents/Mendeley Desktop/Rapaport et al. - 2013 - Comprehensive evaluation of differential gene expression analysis methods for RNA-seq data.pdf:pdf},
- issn = {1465-6906},
- journal = {Genome Biology},
- number = {9},
- pages = {R95},
- title = {{Comprehensive evaluation of differential gene expression analysis methods for RNA-seq data}},
- url = {http://genomebiology.com/2013/14/9/R95},
- volume = {14},
- year = {2013}
- }
- @article{Moradkhani2009,
- abstract = {The human alpha-globin genes are paralogues, sharing a high degree of DNA sequence similarity and producing an identical alpha-globin chain. Over half of the alpha-globin structural variants reported to date are only characterized at the amino acid level. It is likely that a fraction of these variants, with phenotypes differing from one observation to another, may be due to the same mutation but on a different alpha-globin gene. There have been very few previous examples of hemoglobin variants that can be found at both HBA1 and HBA2 genes. Here, we report the results of a systematic multicenter study in a large multiethnic population to identify such variants and to analyze their differences from a functional and evolutionary perspective. We identified 14 different Hb variants resulting from identical mutations on either one of the two human alpha-globin paralogue genes. We also showed that the average percentage of hemoglobin variants due to a HBA2 gene mutation (alpha2) is higher than the percentage of hemoglobin variants due to the same HBA1 gene mutation (alpha1) and that the alpha2/alpha1 ratio varied between variants. These alpha-globin chain variants have most likely occurred via recurrent mutations, gene conversion events, or both. Based on these data, we propose a nomenclature for hemoglobin variants that fall into this category.},
- author = {Moradkhani, Kamran and Pr{\'{e}}hu, Claude and Old, John and Henderson, Shirley and Balamitsa, Vera and Luo, Hong Yuan and Poon, Man Chiu and Chui, David H K and Wajcman, Henri and Patrinos, George P.},
- doi = {10.1007/s00277-008-0624-3},
- file = {:Users/ryan/Documents/Mendeley Desktop/Moradkhani et al. - 2009 - Mutations in the paralogous human $\alpha$-globin genes yielding identical hemoglobin variants.pdf:pdf},
- isbn = {09395555 (ISSN)},
- issn = {09395555},
- journal = {Annals of Hematology},
- keywords = {Gene conversion,Hemoglobin variants,Mutations,Paralogues,$\alpha$-Globin genes},
- number = {6},
- pages = {535--543},
- pmid = {18923834},
- title = {{Mutations in the paralogous human $\alpha$-globin genes yielding identical hemoglobin variants}},
- volume = {88},
- year = {2009}
- }
- @article{Churchill2002,
- abstract = {Microarray technology is now widely available and is being applied to address increasingly complex scientific questions. Consequently, there is a greater demand for statistical assessment of the conclusions drawn from microarray experiments. This review discusses fundamental issues of how to design an experiment to ensure that the resulting data are amenable to statistical analysis. The discussion focuses on two-color spotted cDNA microarrays, but many of the same issues apply to single-color gene-expression assays as well.},
- author = {Churchill, Gary a},
- doi = {10.1038/ng1031},
- file = {:Users/ryan/Documents/Mendeley Desktop/Churchill - 2002 - Fundamentals of experimental design for cDNA microarrays.pdf:pdf},
- issn = {1061-4036},
- journal = {Nature genetics},
- keywords = {Animals,DNA, Complementary,DNA, Complementary: analysis,Gene Expression,Gene Expression Profiling,Gene Expression Profiling: methods,Mice,Models, Biological,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Reference Standards,Reproducibility of Results,Research Design,Statistics as Topic},
- month = {dec},
- number = {december},
- pages = {490--5},
- pmid = {12454643},
- title = {{Fundamentals of experimental design for cDNA microarrays.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/12454643},
- volume = {32 Suppl},
- year = {2002}
- }
- @article{Lun2015,
- author = {Lun, Aaron T L and Chen, Yunshun},
- file = {:Users/ryan/Documents/Mendeley Desktop/Lun, Chen - 2015 - It ' s DE-licious a recipe for differential expression analyses of RNA-seq experiments using quasi-likelihood meth.pdf:pdf},
- number = {April},
- pages = {1--23},
- title = {{It ' s DE-licious : a recipe for differential expression analyses of RNA-seq experiments using quasi-likelihood methods in edgeR}},
- url = {http://www.statsci.org/smyth/pubs/QLedgeRPreprint.pdf},
- volume = {1418},
- year = {2015}
- }
- @article{Anders2013a,
- abstract = {RNA sequencing (RNA-seq) has been rapidly adopted for the profiling of transcriptomes in many areas of biology, including studies into gene regulation, development and disease. Of particular interest is the discovery of differentially expressed genes across different conditions (e.g., tissues, perturbations) while optionally adjusting for other systematic factors that affect the data-collection process. There are a number of subtle yet crucial aspects of these analyses, such as read counting, appropriate treatment of biological variability, quality control checks and appropriate setup of statistical modeling. Several variations have been presented in the literature, and there is a need for guidance on current best practices. This protocol presents a state-of-the-art computational and statistical RNA-seq differential expression analysis workflow largely based on the free open-source R language and Bioconductor software and, in particular, on two widely used tools, DESeq and edgeR. Hands-on time for typical small experiments (e.g., 4-10 samples) can be {\textless}1 h, with computation time {\textless}1 d using a standard desktop PC.},
- author = {Anders, Simon and McCarthy, Davis J and Chen, Yunshun and Okoniewski, Michal and Smyth, Gordon K and Huber, Wolfgang and Robinson, Mark D},
- doi = {10.1038/nprot.2013.099},
- file = {:Users/ryan/Documents/Mendeley Desktop/Anders et al. - 2013 - Count-based differential expression analysis of RNA sequencing data using R and Bioconductor.pdf:pdf},
- issn = {1750-2799},
- journal = {Nature protocols},
- keywords = {Base Sequence,Computational Biology,Computational Biology: methods,Gene Expression Profiling,Gene Expression Profiling: methods,Sequence Analysis, RNA,Sequence Analysis, RNA: methods,Software,Workflow},
- month = {sep},
- number = {9},
- pages = {1765--86},
- pmid = {23975260},
- title = {{Count-based differential expression analysis of RNA sequencing data using R and Bioconductor.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/23975260},
- volume = {8},
- year = {2013}
- }
- @article{Yaari2013,
- abstract = {Enrichment analysis of gene sets is a popular approach that provides a functional interpretation of genome-wide expression data. Existing tests are affected by inter-gene correlations, resulting in a high Type I error. The most widely used test, Gene Set Enrichment Analysis, relies on computationally intensive permutations of sample labels to generate a null distribution that preserves gene-gene correlations. A more recent approach, CAMERA, attempts to correct for these correlations by estimating a variance inflation factor directly from the data. Although these methods generate P-values for detecting gene set activity, they are unable to produce confidence intervals or allow for post hoc comparisons. We have developed a new computational framework for Quantitative Set Analysis of Gene Expression (QuSAGE). QuSAGE accounts for inter-gene correlations, improves the estimation of the variance inflation factor and, rather than evaluating the deviation from a null hypothesis with a P-value, it quantifies gene-set activity with a complete probability density function. From this probability density function, P-values and confidence intervals can be extracted and post hoc analysis can be carried out while maintaining statistical traceability. Compared with Gene Set Enrichment Analysis and CAMERA, QuSAGE exhibits better sensitivity and specificity on real data profiling the response to interferon therapy (in chronic Hepatitis C virus patients) and Influenza A virus infection. QuSAGE is available as an R package, which includes the core functions for the method as well as functions to plot and visualize the results.},
- author = {Yaari, Gur and Bolen, Christopher R and Thakar, Juilee and Kleinstein, Steven H},
- doi = {10.1093/nar/gkt660},
- file = {:Users/ryan/Documents/Mendeley Desktop/Yaari et al. - 2013 - Quantitative set analysis for gene expression a method to quantify gene set differential expression including gene.pdf:pdf},
- issn = {1362-4962},
- journal = {Nucleic acids research},
- keywords = {Confidence Intervals,Data Interpretation, Statistical,Gene Expression Profiling,Gene Expression Profiling: methods,Genes,Humans,Influenza, Human,Influenza, Human: genetics,Influenza, Human: metabolism},
- month = {oct},
- number = {18},
- pages = {e170},
- pmid = {23921631},
- title = {{Quantitative set analysis for gene expression: a method to quantify gene set differential expression including gene-gene correlations.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3794608{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {41},
- year = {2013}
- }
- @article{Phipson2016,
- abstract = {One of the most common analysis tasks in genomic research is to identify genes that are differentially expressed (DE) between experimental conditions. Empirical Bayes (EB) statistical tests using moderated genewise variances have been very effective for this purpose, especially when the number of biological replicate samples is small. The EB procedures can however be heavily influenced by a small number of genes with very large or very small variances. This article improves the differential expression tests by robustifying the hyperparameter estimation procedure. The robust procedure has the effect of decreasing the informativeness of the prior distribution for outlier genes while increasing its informativeness for other genes. This effect has the double benefit of reducing the chance that hypervariable genes will be spuriously identified as DE while increasing statistical power for the main body of genes. The robust EB algorithm is fast and numerically stable. The procedure allows exact small-sample null distributions for the test statistics and reduces exactly to the original EB procedure when no outlier genes are present. Simulations show that the robustified tests have similar performance to the original tests in the absence of outlier genes but have greater power and robustness when outliers are present. The article includes case studies for which the robust method correctly identifies and downweights genes associated with hidden covariates and detects more genes likely to be scientifically relevant to the experimental conditions. The new procedure is implemented in the limma software package freely available from the Bioconductor repository.},
- author = {Phipson, Belinda and Lee, Stanley and Majewski, Ian J. and Alexander, Warren S. and Smyth, Gordon K.},
- doi = {10.1214/16-AOAS920},
- file = {:Users/ryan/Documents/Mendeley Desktop/Phipson et al. - 2016 - Robust hyperparameter estimation protects against hypervariable genes and improves power to detect differenti(2).pdf:pdf},
- issn = {1932-6157},
- journal = {The Annals of Applied Statistics},
- keywords = {Empirical Bayes,Gene expression,Microarrays,Outliers,Robustness},
- month = {jun},
- number = {2},
- pages = {946--963},
- title = {{Robust hyperparameter estimation protects against hypervariable genes and improves power to detect differential expression}},
- url = {http://projecteuclid.org/euclid.aoas/1469199900},
- volume = {10},
- year = {2016}
- }
- @article{Teng2016a,
- abstract = {Obtaining RNA-seq measurements involves a complex data analytical process with a large number of competing algorithms as options. There is much debate about which of these methods provides the best approach. Unfortunately, it is currently difficult to evaluate their performance due in part to a lack of sensitive assessment metrics. We present a series of statistical summaries and plots to evaluate the performance in terms of specificity and sensitivity, available as a R/Bioconductor package ( http://bioconductor.org/packages/rnaseqcomp ). Using two independent datasets, we assessed seven competing pipelines. Performance was generally poor, with two methods clearly underperforming and RSEM slightly outperforming the rest.},
- author = {Teng, Mingxiang and Love, Michael I. and Davis, Carrie A. and Djebali, Sarah and Dobin, Alexander and Graveley, Brenton R. and Li, Sheng and Mason, Christopher E. and Olson, Sara and Pervouchine, Dmitri and Sloan, Cricket A. and Wei, Xintao and Zhan, Lijun and Irizarry, Rafael A.},
- doi = {10.1186/s13059-016-0940-1},
- file = {:Users/ryan/Documents/Mendeley Desktop/Teng et al. - 2016 - A benchmark for RNA-seq quantification pipelines.pdf:pdf},
- isbn = {1474760X (Electronic)},
- issn = {1474-760X},
- journal = {Genome biology},
- number = {1},
- pages = {74},
- pmid = {27107712},
- publisher = {Genome Biology},
- title = {{A benchmark for RNA-seq quantification pipelines.}},
- url = {http://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-0940-1{\%}5Cnhttp://www.ncbi.nlm.nih.gov/pubmed/27107712{\%}5Cnhttp://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC4842274},
- volume = {17},
- year = {2016}
- }
- @article{McLean2010,
- abstract = {We developed the Genomic Regions Enrichment of Annotations Tool (GREAT) to analyze the functional significance of cis-regulatory regions identified by localized measurements of DNA binding events across an entire genome. Whereas previous methods took into account only binding proximal to genes, GREAT is able to properly incorporate distal binding sites and control for false positives using a binomial test over the input genomic regions. GREAT incorporates annotations from 20 ontologies and is available as a web application. Applying GREAT to data sets from chromatin immunoprecipitation coupled with massively parallel sequencing (ChIP-seq) of multiple transcription-associated factors, including SRF, NRSF, GABP, Stat3 and p300 in different developmental contexts, we recover many functions of these factors that are missed by existing gene-based tools, and we generate testable hypotheses. The utility of GREAT is not limited to ChIP-seq, as it could also be applied to open chromatin, localized epigenomic markers and similar functional data sets, as well as comparative genomics sets.},
- archivePrefix = {arXiv},
- arxivId = {15334406},
- author = {McLean, Cory Y. and Bristor, Dave and Hiller, Michael and Clarke, Shoa L. and Schaar, Bruce T. and Lowe, Craig B. and Wenger, Aaron M. and Bejerano, Gill},
- doi = {10.1038/nbt.1630},
- eprint = {15334406},
- file = {:Users/ryan/Documents/Mendeley Desktop/McLean et al. - 2010 - GREAT improves functional interpretation of cis-regulatory regions.pdf:pdf},
- isbn = {1546-1696 (Electronic)$\backslash$n1087-0156 (Linking)},
- issn = {10870156},
- journal = {Nature Biotechnology},
- number = {5},
- pages = {495--501},
- pmid = {20436461},
- publisher = {Nature Publishing Group},
- title = {{GREAT improves functional interpretation of cis-regulatory regions}},
- url = {http://dx.doi.org/10.1038/nbt.1630 http://bejerano.stanford.edu/papers/GREAT.pdf},
- volume = {28},
- year = {2010}
- }
- @article{Kechris2010,
- abstract = {High density tiling arrays are an effective strategy for genome-wide identification of transcription factor binding regions. Sliding window methods that calculate moving averages of log ratios or t-statistics have been useful for the analysis of tiling array data. Here, we present a method that generalizes the moving average approach to evaluate sliding windows of p-values by using combined p-value statistics. In particular, the combined p-value framework can be useful in situations when taking averages of the corresponding test-statistic for the hypothesis may not be appropriate or when it is difficult to assess the significance of these averages. We exhibit the strengths of the combined p-values methods on Drosophila tiling array data and assess their ability to predict genomic regions enriched for transcription factor binding. The predictions are evaluated based on their proximity to target genes and their enrichment of known transcription factor binding sites. We also present an application for the generalization of the moving average based on integrating two different tiling array experiments.},
- author = {Kechris, Katerina J and Biehs, Brian and Kornberg, Thomas B},
- doi = {10.2202/1544-6115.1434},
- file = {:Users/ryan/Documents/Mendeley Desktop/Kechris, Biehs, Kornberg - 2010 - Generalizing moving averages for tiling arrays using combined p-value statistics.pdf:pdf},
- issn = {1544-6115},
- journal = {Statistical applications in genetics and molecular biology},
- number = {1},
- pages = {Article29},
- pmid = {20812907},
- title = {{Generalizing moving averages for tiling arrays using combined p-value statistics.}},
- url = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2942027/pdf/sagmb1434.pdf},
- volume = {9},
- year = {2010}
- }
- @article{Tan2017,
- author = {Tan, Jie and Huyck, Matthew and Hu, Dongbo and Zelaya, Ren{\'{e}} A and Hogan, Deborah A and Casey, S},
- doi = {10.1101/156620},
- file = {:Users/ryan/Documents/Mendeley Desktop/Tan et al. - 2017 - ADAGE signature analysis differential expression analysis with data-defined gene sets.pdf:pdf},
- title = {{ADAGE signature analysis : differential expression analysis with data-defined gene sets}},
- url = {http://www.biorxiv.org/content/biorxiv/early/2017/06/27/156620.full.pdf},
- year = {2017}
- }
- @article{McGill1978,
- abstract = {[Box plots display batches of data. Five values from a set of data are conventionally used; the extremes, the upper and lower hinges (quartiles), and the median. Such plots are becoming a widely used tool in exploratory data analysis and in preparing visual summaries for statisticians and nonstatisticians alike. Three variants of the basic display, devised by the authors, are described. The first visually incorporates a measure of group size; the second incorporates an indication of rough significance of differences between medians; the third combines the features of the first two. These techniques are displayed by examples.]},
- author = {McGill, Robert and Tukey, John W and Larsen, Wayne A},
- doi = {10.2307/2683468},
- issn = {00031305},
- journal = {The American Statistician},
- number = {1},
- pages = {12--16},
- publisher = {[American Statistical Association, Taylor {\&} Francis, Ltd.]},
- title = {{Variations of Box Plots}},
- url = {http://www.jstor.org/stable/2683468},
- volume = {32},
- year = {1978}
- }
- @article{Schroder2004,
- abstract = {Interferon-gamma (IFN-gamma) coordinates a diverse array of cellular programs through transcriptional regulation of immunologically relevant genes. This article reviews the current understanding of IFN-gamma ligand, receptor, signal transduction, and cellular effects with a focus on macrophage responses and to a lesser extent, responses from other cell types that influence macrophage function during infection. The current model for IFN-gamma signal transduction is discussed, as well as signal regulation and factors conferring signal specificity. Cellular effects of IFN-gamma are described, including up-regulation of pathogen recognition, antigen processing and presentation, the antiviral state, inhibition of cellular proliferation and effects on apoptosis, activation of microbicidal effector functions, immunomodulation, and leukocyte trafficking. In addition, integration of signaling and response with other cytokines and pathogen-associated molecular patterns, such as tumor necrosis factor-alpha, interleukin-4, type I IFNs, and lipopolysaccharide are discussed.},
- author = {Schroder, Kate and Hertzog, Paul J and Ravasi, Timothy and Hume, David A},
- doi = {10.1189/jlb.0603252},
- file = {:Users/ryan/Documents/Mendeley Desktop/Schroder et al. - 2004 - Interferon-gamma an overview of signals, mechanisms and functions.pdf:pdf},
- issn = {0741-5400},
- journal = {Journal of leukocyte biology},
- keywords = {Animals,Gene Expression Regulation,Gene Expression Regulation: immunology,Humans,Inflammation,Inflammation: immunology,Interferon-gamma,Interferon-gamma: immunology,Interferon-gamma: physiology,Macrophages,Macrophages: immunology,Receptor Cross-Talk,Receptor Cross-Talk: immunology,Signal Transduction,Signal Transduction: immunology},
- month = {feb},
- number = {2},
- pages = {163--89},
- pmid = {14525967},
- title = {{Interferon-gamma: an overview of signals, mechanisms and functions.}},
- url = {http://www.jleukbio.org/content/75/2/163.short http://www.ncbi.nlm.nih.gov/pubmed/14525967},
- volume = {75},
- year = {2004}
- }
- @article{Ingolia2011,
- abstract = {The ability to sequence genomes has far outstripped approaches for deciphering the information they encode. Here we present a suite of techniques, based on ribosome profiling (the deep sequencing of ribosome-protected mRNA fragments), to provide genome-wide maps of protein synthesis as well as a pulse-chase strategy for determining rates of translation elongation. We exploit the propensity of harringtonine to cause ribosomes to accumulate at sites of translation initiation together with a machine learning algorithm to define protein products systematically. Analysis of translation in mouse embryonic stem cells reveals thousands of strong pause sites and unannotated translation products. These include amino-terminal extensions and truncations and upstream open reading frames with regulatory potential, initiated at both AUG and non-AUG codons, whose translation changes after differentiation. We also define a class of short, polycistronic ribosome-associated coding RNAs (sprcRNAs) that encode small proteins. Our studies reveal an unanticipated complexity to mammalian proteomes.},
- author = {Ingolia, Nicholas T and Lareau, Liana F and Weissman, Jonathan S},
- doi = {10.1016/j.cell.2011.10.002},
- file = {:Users/ryan/Documents/Mendeley Desktop/Ingolia, Lareau, Weissman - 2011 - Ribosome profiling of mouse embryonic stem cells reveals the complexity and dynamics of mammalian pro.pdf:pdf},
- issn = {1097-4172},
- journal = {Cell},
- keywords = {Algorithms,Animals,Artificial Intelligence,Embryoid Bodies,Embryoid Bodies: cytology,Embryoid Bodies: metabolism,Embryonic Stem Cells,Embryonic Stem Cells: metabolism,Genomics,Genomics: methods,Harringtonines,Harringtonines: pharmacology,High-Throughput Nucleotide Sequencing,High-Throughput Nucleotide Sequencing: methods,Kinetics,Mice,Open Reading Frames,Peptide Chain Initiation, Translational,Protein Biosynthesis,RNA,RNA: analysis,Ribosomes,Ribosomes: chemistry,Ribosomes: drug effects,Sequence Analysis, RNA,Sequence Analysis, RNA: methods},
- month = {nov},
- number = {4},
- pages = {789--802},
- pmid = {22056041},
- publisher = {Elsevier Inc.},
- title = {{Ribosome profiling of mouse embryonic stem cells reveals the complexity and dynamics of mammalian proteomes.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3225288{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {147},
- year = {2011}
- }
- @article{Barski2007,
- abstract = {Histone modifications are implicated in influencing gene expression. We have generated high-resolution maps for the genome-wide distribution of 20 histone lysine and arginine methylations as well as histone variant H2A.Z, RNA polymerase II, and the insulator binding protein CTCF across the human genome using the Solexa 1G sequencing technology. Typical patterns of histone methylations exhibited at promoters, insulators, enhancers, and transcribed regions are identified. The monomethylations of H3K27, H3K9, H4K20, H3K79, and H2BK5 are all linked to gene activation, whereas trimethylations of H3K27, H3K9, and H3K79 are linked to repression. H2A.Z associates with functional regulatory elements, and CTCF marks boundaries of histone methylation domains. Chromosome banding patterns are correlated with unique patterns of histone modifications. Chromosome breakpoints detected in T cell cancers frequently reside in chromatin regions associated with H3K4 methylations. Our data provide new insights into the function of histone methylation and chromatin organization in genome function.},
- author = {Barski, Artem and Cuddapah, Suresh and Cui, Kairong and Roh, Tae-Young and Schones, Dustin E and Wang, Zhibin and Wei, Gang and Chepelev, Iouri and Zhao, Keji},
- doi = {10.1016/j.cell.2007.05.009},
- file = {:Users/ryan/Documents/Mendeley Desktop/Barski et al. - 2007 - High-resolution profiling of histone methylations in the human genome.pdf:pdf},
- issn = {0092-8674},
- journal = {Cell},
- keywords = {Chromatin,Chromatin: genetics,Chromatin: ultrastructure,Chromosome Breakage,Enhancer Elements, Genetic,Enhancer Elements, Genetic: genetics,Epigenesis, Genetic,Epigenesis, Genetic: genetics,Gene Expression Profiling,Gene Expression Profiling: methods,Gene Expression Regulation,Gene Expression Regulation: genetics,Genome, Human,Genome, Human: genetics,Histone-Lysine N-Methyltransferase,Histone-Lysine N-Methyltransferase: metabolism,Histones,Histones: genetics,Histones: metabolism,Humans,Lymphoma,Lymphoma: genetics,Methylation,Promoter Regions, Genetic,Promoter Regions, Genetic: genetics,Protein Methyltransferases,RNA Polymerase II,RNA Polymerase II: metabolism,Regulatory Elements, Transcriptional,Regulatory Elements, Transcriptional: genetics,Transcriptional Activation,Transcriptional Activation: genetics},
- month = {may},
- number = {4},
- pages = {823--37},
- pmid = {17512414},
- title = {{High-resolution profiling of histone methylations in the human genome.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/17512414},
- volume = {129},
- year = {2007}
- }
- @article{Kapourani2018,
- abstract = {Measurements of DNA methylation at the single cell level are promising to revolutionise our understanding of epigenetic control of gene expression. Yet, intrinsic limitations of the technology result in very sparse coverage of CpG sites (around 5{\%} to 20{\%} coverage), effectively limiting the analysis repertoire to a semi-quantitative level. Here we introduce Melissa (MEthyLation Inference for Single cell Analysis), a Bayesian hierarchical method to quantify spatially-varying methylation profiles across genomic regions from single-cell bisulfite sequencing data (scBS-seq). Melissa clusters individual cells based on local methylation patterns, enabling the discovery of epigenetic differences and similarities among individual cells. The clustering also acts as an effective regularisation method for imputation of methylation on unassayed CpG sites, enabling transfer of information between individual cells. We show both on simulated and real data sets that Melissa provides accurate and biologically meaningful clusterings, and state-of-the-art imputation performance. An R implementation of Melissa is publicly available at https://github.com/andreaskapou/Melissa.},
- author = {Kapourani, Chantriolnt-Andreas and Sanguinetti, Guido},
- doi = {10.1101/312025},
- file = {:Users/ryan/Documents/Mendeley Desktop/Kapourani, Sanguinetti - 2018 - Melissa Bayesian clustering and imputation of single cell methylomes(2).pdf:pdf},
- journal = {bioRxiv},
- pages = {1--16},
- title = {{Melissa: Bayesian clustering and imputation of single cell methylomes}},
- url = {http://biorxiv.org/content/early/2018/05/01/312025.abstract},
- year = {2018}
- }
- @article{Tibshirani2002,
- abstract = {We have devised an approach to cancer class prediction from gene expression profiling, based on an enhancement of the simple nearest prototype (centroid) classifier. We shrink the prototypes and hence obtain a classifier that is often more accurate than competing methods. Our method of "nearest shrunken centroids" identifies subsets of genes that best characterize each class. The technique is general and can be used in many other classification problems. To demonstrate its effectiveness, we show that the method was highly efficient in finding genes for classifying small round blue cell tumors and leukemias.},
- author = {Tibshirani, Robert and Hastie, Trevor and Narasimhan, Balasubramanian and Chu, Gilbert},
- doi = {10.1073/pnas.082099299},
- file = {:Users/ryan/Documents/Mendeley Desktop/Tibshirani et al. - 2002 - Diagnosis of multiple cancer types by shrunken centroids of gene expression.pdf:pdf},
- issn = {0027-8424},
- journal = {Proceedings of the National Academy of Sciences of the United States of America},
- keywords = {Child,DNA, Neoplasm,DNA, Neoplasm: analysis,Discriminant Analysis,Gene Expression,Gene Expression Profiling,Humans,Neoplasms,Neoplasms: classification,Neoplasms: diagnosis,Neoplasms: genetics,Precursor Cell Lymphoblastic Leukemia-Lymphoma,Precursor Cell Lymphoblastic Leukemia-Lymphoma: cl,Precursor Cell Lymphoblastic Leukemia-Lymphoma: di,Precursor Cell Lymphoblastic Leukemia-Lymphoma: ge,Probability},
- month = {may},
- number = {10},
- pages = {6567--72},
- pmid = {12011421},
- title = {{Diagnosis of multiple cancer types by shrunken centroids of gene expression.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=124443{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {99},
- year = {2002}
- }
- @article{Liao2013,
- abstract = {Read alignment is an ongoing challenge for the analysis of data from sequencing technologies. This article proposes an elegantly simple multi-seed strategy, called seed-and-vote, for mapping reads to a reference genome. The new strategy chooses the mapped genomic location for the read directly from the seeds. It uses a relatively large number of short seeds (called subreads) extracted from each read and allows all the seeds to vote on the optimal location. When the read length is {\textless}160 bp, overlapping subreads are used. More conventional alignment algorithms are then used to fill in detailed mismatch and indel information between the subreads that make up the winning voting block. The strategy is fast because the overall genomic location has already been chosen before the detailed alignment is done. It is sensitive because no individual subread is required to map exactly, nor are individual subreads constrained to map close by other subreads. It is accurate because the final location must be supported by several different subreads. The strategy extends easily to find exon junctions, by locating reads that contain sets of subreads mapping to different exons of the same gene. It scales up efficiently for longer reads.},
- author = {Liao, Yang and Smyth, Gordon K. and Shi, Wei},
- doi = {10.1093/nar/gkt214},
- file = {:Users/ryan/Documents/Mendeley Desktop/Liao, Smyth, Shi - 2013 - The Subread aligner fast, accurate and scalable read mapping by seed-and-vote(2).pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/Liao, Smyth, Shi - 2013 - The Subread aligner fast, accurate and scalable read mapping by seed-and-vote.pdf:pdf},
- issn = {1362-4962},
- journal = {Nucleic acids research},
- keywords = {Exons,Genomics,High-Throughput Nucleotide Sequencing,INDEL Mutation,Sequence Alignment,Sequence Alignment: methods,Software},
- month = {may},
- number = {10},
- pages = {e108},
- pmid = {23558742},
- title = {{The Subread aligner: fast, accurate and scalable read mapping by seed-and-vote.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3664803{\&}tool=pmcentrez{\&}rendertype=abstract http://www.nar.oxfordjournals.org/cgi/doi/10.1093/nar/gkt214},
- volume = {41},
- year = {2013}
- }
- @article{Weng2012,
- abstract = {How the immune system remembers a previous encounter with a pathogen and responds more efficiently to a subsequent encounter has been one of the central enigmas for immunologists for over a century. The identification of pathogen-specific memory lymphocytes that arise after an infection provided a cellular basis for immunological memory. But the molecular mechanisms of immunological memory remain only partially understood. The emerging evidence suggests that epigenetic changes have a key role in controlling the distinct transcriptional profiles of memory lymphocytes and thus in shaping their function. In this Review, we summarize the recent progress that has been made in assessing the differential gene expression and chromatin modifications in memory CD4(+) and CD8(+) T cells, and we present our current understanding of the molecular basis of memory T cell function.},
- author = {Weng, Nan-Ping and Araki, Yasuto and Subedi, Kalpana},
- doi = {10.1038/nri3173},
- file = {:Users/ryan/Documents/Mendeley Desktop/Weng, Araki, Subedi - 2012 - The molecular basis of the memory T cell response differential gene expression and its epigenetic regulatio.pdf:pdf},
- issn = {1474-1741},
- journal = {Nature reviews. Immunology},
- month = {mar},
- number = {4},
- pages = {306--315},
- pmid = {22421787},
- publisher = {Nature Publishing Group},
- title = {{The molecular basis of the memory T cell response: differential gene expression and its epigenetic regulation.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/22421787},
- volume = {12},
- year = {2012}
- }
- @article{Singh2011,
- abstract = {In eukaryotic cells, alternative splicing expands the diversity of RNA transcripts and plays an important role in tissue-specific differentiation, and can be misregulated in disease. To understand these processes, there is a great need for methods to detect differential transcription between samples. Our focus is on samples observed using short-read RNA sequencing (RNA-seq).},
- author = {Singh, Darshan and Orellana, Christian F and Hu, Yin and Jones, Corbin D and Liu, Yufeng and Chiang, Derek Y and Liu, Jinze and Prins, Jan F},
- doi = {10.1093/bioinformatics/btr458},
- file = {:Users/ryan/Documents/Mendeley Desktop/Singh et al. - 2011 - FDM a graph-based statistical method to detect differential transcription using RNA-seq data.pdf:pdf},
- issn = {1367-4811},
- journal = {Bioinformatics (Oxford, England)},
- keywords = {Alternative Splicing,Gene Expression Profiling,Gene Expression Profiling: methods,Genome,Humans,Models, Genetic,Protein Isoforms,Protein Isoforms: genetics,RNA,RNA: genetics,Sequence Analysis, RNA,Sequence Analysis, RNA: methods,Transcription, Genetic,Transcriptome,Transcriptome: genetics},
- month = {oct},
- number = {19},
- pages = {2633--40},
- pmid = {21824971},
- title = {{FDM: a graph-based statistical method to detect differential transcription using RNA-seq data.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3179659{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {27},
- year = {2011}
- }
- @article{Voss2011,
- abstract = {The glucocorticoid receptor (GR), like other eukaryotic transcription factors, regulates gene expression by interacting with chromatinized DNA response elements. Photobleaching experiments in living cells indicate that receptors transiently interact with DNA on the time scale of seconds and predict that the response elements may be sparsely occupied on average. Here, we show that the binding of one receptor at the glucocorticoid response element (GRE) does not reduce the steady-state binding of another receptor variant to the same GRE. Mathematical simulations reproduce this noncompetitive state using short GR/GRE residency times and relatively long times between DNA binding events. At many genomic sites where GR binding causes increased chromatin accessibility, concurrent steady-state binding levels for the variant receptor are actually increased, a phenomenon termed assisted loading. Temporally sparse transcription factor-DNA interactions induce local chromatin reorganization, resulting in transient access for binding of secondary regulatory factors.},
- author = {Voss, Ty C and Schiltz, R Louis and Sung, Myong-Hee and Yen, Paul M and Stamatoyannopoulos, John a and Biddie, Simon C and Johnson, Thomas a and Miranda, Tina B and John, Sam and Hager, Gordon L},
- doi = {10.1016/j.cell.2011.07.006},
- file = {:Users/ryan/Documents/Mendeley Desktop/Voss et al. - 2011 - Dynamic exchange at regulatory elements during chromatin remodeling underlies assisted loading mechanism.pdf:pdf},
- issn = {1097-4172},
- journal = {Cell},
- keywords = {Adenosine Triphosphate,Adenosine Triphosphate: metabolism,Animals,Cell Line, Tumor,Chromatin Assembly and Disassembly,Mammary Tumor Virus, Mouse,Mice,Models, Biological,Monte Carlo Method,Nucleosomes,Nucleosomes: metabolism,Receptors, Estrogen,Receptors, Estrogen: metabolism,Receptors, Glucocorticoid,Receptors, Glucocorticoid: metabolism,Regulatory Sequences, Nucleic Acid,Response Elements,Transcription Factors,Transcription Factors: metabolism},
- month = {aug},
- number = {4},
- pages = {544--54},
- pmid = {21835447},
- publisher = {Elsevier Inc.},
- title = {{Dynamic exchange at regulatory elements during chromatin remodeling underlies assisted loading mechanism.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/21835447},
- volume = {146},
- year = {2011}
- }
- @article{Kowalski2003,
- abstract = {Each year, 55 000 organ transplants are performed worldwide. Cumulatively, the number of living organ recipients is now estimated to be over 300 000. Most of these transplant recipients will remain on immunosuppressive drugs for the remainder of their lives to prevent rejection episodes. Controlled doses of these drugs are required to prevent over-medication, which may leave the patient susceptible to opportunistic infection and drug toxicity effects, or under-dosing, which may lead to shortened graft survival because of rejection episodes. This paper describes the result of a multicenter study conducted at the Universities of Pittsburgh, Alabama and Maryland to evaluate an in vitro assay (CylexTM Immune Cell Function Assay) for the measurement of global immune response in transplant patients receiving immunosuppressive therapy. The assay uses a whole blood sample to maintain the presence of the drug during incubation. Following overnight incubation of blood with phytohemagglutinin (PHA), CD4 cells are selected using paramagnetic particles coated with a monoclonal antibody to the CD4 epitope. The CD4-positive cells are targeted as major immunosuppressive drugs are designed to specifically inhibit T-cell activation which has been implicated in rejection. The data generated at these three sites were submitted in support of an Food and Drug Association (FDA) application for the use of this assay in the detection of cell-mediated immunity in an immunosuppressed population. The assay was cleared by the FDA on April 2, 2002. This cross-sectional study was designed to establish ranges for reactivity of this bioassay in the assessment of functional immunity for an individual solid organ recipient at any point in time.},
- author = {Kowalski, Richard and Post, Diane and Schneider, Mary C. and Britz, Judith and Thomas, Judy and Deierhoi, Mark and Lobashevsky, Andrew and Redfield, Robert and Schweitzer, Eugene and Heredia, Alonso and Reardon, Elise and Davis, Charles and Bentlejewski, Carol and Fung, John and Shapiro, Ron and Zeevi, Adriana},
- doi = {10.1034/j.1399-0012.2003.00013.x},
- file = {:Users/ryan/Documents/Mendeley Desktop/Kowalski et al. - 2003 - Immune cell function testing An adjunct to therapeutic drug monitoring in transplant patient management.pdf:pdf},
- issn = {09020063},
- journal = {Clinical Transplantation},
- keywords = {Functional immunosuppression,Immune cell function testing,Immune monitoring,Immune response,Immunosuppression,Therapeutic drug monitoring,Transplant patient management,Transplant rejection},
- number = {2},
- pages = {77--88},
- title = {{Immune cell function testing: An adjunct to therapeutic drug monitoring in transplant patient management}},
- volume = {17},
- year = {2003}
- }
- @article{Salzman2012,
- abstract = {Most human pre-mRNAs are spliced into linear molecules that retain the exon order defined by the genomic sequence. By deep sequencing of RNA from a variety of normal and malignant human cells, we found RNA transcripts from many human genes in which the exons were arranged in a non-canonical order. Statistical estimates and biochemical assays provided strong evidence that a substantial fraction of the spliced transcripts from hundreds of genes are circular RNAs. Our results suggest that a non-canonical mode of RNA splicing, resulting in a circular RNA isoform, is a general feature of the gene expression program in human cells.},
- author = {Salzman, Julia and Gawad, Charles and Wang, Peter Lincoln and Lacayo, Norman and Brown, Patrick O},
- doi = {10.1371/journal.pone.0030733},
- file = {:Users/ryan/Documents/Mendeley Desktop/Salzman et al. - 2012 - Circular RNAs Are the Predominant Transcript Isoform from Hundreds of Human Genes in Diverse Cell Types.pdf:pdf},
- issn = {1932-6203},
- journal = {PloS one},
- month = {jan},
- number = {2},
- pages = {e30733},
- pmid = {22319583},
- title = {{Circular RNAs Are the Predominant Transcript Isoform from Hundreds of Human Genes in Diverse Cell Types.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3270023{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {7},
- year = {2012}
- }
- @article{Hussey2017,
- abstract = {Despite the considerable contribution of xylem development (xylogenesis) to plant biomass accumulation, its epigenetic regulation is poorly understood. Furthermore, the relative contributions of histone modifications to transcriptional regulation is not well studied in plants. We investigated the biological relevance of H3K4me3 and H3K27me3 in secondary xylem development using ChIP-seq and their association with transcript levels among other histone modifications in woody and herbaceous models. In developing secondary xylem of the woody model Eucalyptus grandis, H3K4me3 and H3K27me3 genomic spans were distinctly associated with xylogenesis-related processes, with (late) lignification pathways enriched for putative bivalent domains, but not early secondary cell wall polysaccharide deposition. H3K27me3-occupied genes, of which 753 ({\~{}}31{\%}) are novel targets, were enriched for transcriptional regulation and flower development and had significant preferential expression in roots. Linear regression models of the ChIP-seq profiles predicted {\~{}}50{\%} of transcript abundance measured with strand-specific RNA-seq, confirmed in a parallel analysis in Arabidopsis where integration of seven additional histone modifications each contributed smaller proportions of unique information to the predictive models. This study uncovers the biological importance of histone modification antagonism and genomic span in xylogenesis and quantifies for the first time the relative correlations of histone modifications with transcript abundance in plants.},
- author = {Hussey, Steven G. and Loots, Mattheus T. and {Van Der Merwe}, Karen and Mizrachi, Eshchar and Myburg, Alexander A.},
- doi = {10.1038/s41598-017-03665-1},
- file = {:Users/ryan/Documents/Mendeley Desktop/Hussey et al. - 2017 - Integrated analysis and transcript abundance modelling of H3K4me3 and H3K27me3 in developing secondary xylem.pdf:pdf},
- issn = {20452322},
- journal = {Scientific Reports},
- number = {1},
- pages = {1--14},
- pmid = {28611454},
- publisher = {Springer US},
- title = {{Integrated analysis and transcript abundance modelling of H3K4me3 and H3K27me3 in developing secondary xylem}},
- url = {https://www.nature.com/articles/s41598-017-03665-1.pdf},
- volume = {7},
- year = {2017}
- }
- @article{Witten2011,
- archivePrefix = {arXiv},
- arxivId = {arXiv:1202.6201v1},
- author = {Witten, Daniela M.},
- doi = {10.1214/11-AOAS493},
- eprint = {arXiv:1202.6201v1},
- file = {:Users/ryan/Documents/Mendeley Desktop/Witten - 2011 - Classification and clustering of sequencing data using a Poisson model.pdf:pdf},
- issn = {1932-6157},
- journal = {The Annals of Applied Statistics},
- keywords = {Classification, clustering, genomics, gene express},
- month = {dec},
- number = {4},
- pages = {2493--2518},
- title = {{Classification and clustering of sequencing data using a Poisson model}},
- url = {http://projecteuclid.org/euclid.aoas/1324399604},
- volume = {5},
- year = {2011}
- }
- @article{Jin2007,
- abstract = {Nucleosomes containing the histone variant H3.3 tend to be clustered in vivo in the neighborhood of transcriptionally active genes and over regulatory elements. It has not been clear, however, whether H3.3-containing nucleosomes possess unique properties that would affect transcription. We report here that H3.3 nucleosomes isolated from vertebrates, regardless of whether they are partnered with H2A or H2A.Z, are unusually sensitive to salt-dependent disruption, losing H2A/H2B or H2A.Z/H2B dimers. Immunoprecipitation studies of nucleosome core particles (NCPs) show that NCPs that contain both H3.3 and H2A.Z are even less stable than NCPs containing H3.3 and H2A. Intriguingly, NCPs containing H3 and H2A.Z are at least as stable as H3/H2A NCPs. These results establish an hierarchy of stabilities for native nucleosomes carrying different complements of variants, and suggest how H2A.Z could play different roles depending on its partners within the NCP. They also are consistent with the idea that H3.3 plays an active role in maintaining accessible chromatin structures in enhancer regions and transcribed regions. Consistent with this idea, promoters and enhancers at transcriptionally active genes and coding regions at highly expressed genes have nucleosomes that simultaneously carry both H3.3 and H2A.Z, and should therefore be extremely sensitive to disruption.},
- author = {Jin, Chunyuan and Felsenfeld, Gary},
- doi = {10.1101/gad.1547707},
- file = {:Users/ryan/Documents/Mendeley Desktop/Jin, Felsenfeld - 2007 - Nucleosome stability mediated by histone variants H3.3 and H2A.Z.pdf:pdf},
- issn = {08909369},
- journal = {Genes and Development},
- keywords = {Histone H2A.Z,Histone H3.3,Nucleosome structure},
- number = {12},
- pages = {1519--1529},
- title = {{Nucleosome stability mediated by histone variants H3.3 and H2A.Z}},
- volume = {21},
- year = {2007}
- }
- @techreport{Phipson2013,
- author = {Phipson, Belinda and Lee, Stanley and Majewski, Ian J and Alexander, Warren S},
- file = {:Users/ryan/Documents/Mendeley Desktop/Phipson et al. - 2013 - Empirical Bayes in the presence of exceptional cases , with application to microarray data.pdf:pdf},
- keywords = {empirical bayes,gene expression,microarrays,outliers,robustness},
- pages = {1--18},
- title = {{Empirical Bayes in the presence of exceptional cases , with application to microarray data}},
- url = {https://pdfs.semanticscholar.org/5db8/fa5a6cce0c0cbbbfc063e622bb4d9d87a994.pdf},
- year = {2013}
- }
- @article{Li2011,
- abstract = {Reproducibility is essential to reliable scientific discovery in high-throughput experiments. In this work we propose a unified approach to measure the reproducibility of findings identified from replicate experiments and identify putative discoveries using reproducibility. Unlike the usual scalar measures of reproducibility, our approach creates a curve, which quantitatively assesses when the findings are no longer consistent across replicates. Our curve is fitted by a copula mixture model, from which we derive a quantitative reproducibility score, which we call the "irreproducible discovery rate" (IDR) analogous to the FDR. This score can be computed at each set of paired replicate ranks and permits the principled setting of thresholds both for assessing reproducibility and combining replicates. Since our approach permits an arbitrary scale for each replicate, it provides useful descriptive measures in a wide variety of situations to be explored. We study the performance of the algorithm using simulations and give a heuristic analysis of its theoretical properties. We demonstrate the effectiveness of our method in a ChIP-seq experiment. {\textcopyright} Institute of Mathematical Statistics, 2011.},
- author = {Li, Qunhua and Brown, James B. and Huang, Haiyan and Bickel, Peter J.},
- doi = {10.1214/11-AOAS466},
- file = {:Users/ryan/Documents/Mendeley Desktop/Li et al. - 2011 - Measuring reproducibility of high-throughput experiments.pdf:pdf},
- issn = {1932-6157},
- journal = {The Annals of Applied Statistics},
- keywords = {Association,Copula,Genomics,High-throughput experiment,Irreproducible discovery rate,Iterative algorithm,Mixture model,Reproducibility},
- month = {sep},
- number = {3},
- pages = {1752--1779},
- title = {{Measuring reproducibility of high-throughput experiments}},
- url = {http://projecteuclid.org/euclid.aoas/1318514284},
- volume = {5},
- year = {2011}
- }
- @incollection{Cohen1988,
- address = {Hillsdale, NJ},
- author = {Cohen, Jacob},
- booktitle = {Statistical Power Analysis for the Behavioral Sciences},
- chapter = {8},
- edition = {2nd},
- editor = {Hove and London},
- file = {:Users/ryan/Documents/Mendeley Desktop/Cohen - 1988 - The Analysis of Variance.pdf:pdf},
- isbn = {0-8058-0283-5},
- keywords = {statistics},
- mendeley-tags = {statistics},
- pages = {273--407},
- publisher = {Lawrence Erlbaum Associates},
- title = {{The Analysis of Variance}},
- url = {http://www.utstat.toronto.edu/{~}brunner/oldclass/378f16/readings/CohenPower.pdf},
- year = {1988}
- }
- @article{Caplan2017,
- abstract = {Mesenchymal stem cells (MSCs) were officially named more than 25 years ago to represent a class of cells from human and mammalian bone marrow and periosteum that could be isolated and expanded in culture while maintaining their in vitro capacity to be induced to form a variety of mesodermal phenotypes and tissues. The in vitro capacity to form bone, cartilage, fat, etc., became an assay for identifying this class of multipotent cells and around which several companies were formed in the 1990s to medically exploit the regenerative capabilities of MSCs. Today, there are hundreds of clinics and hundreds of clinical trials using human MSCs with very few, if any, focusing on the in vitro multipotential capacities of these cells. Unfortunately, the fact that MSCs are called "stem cells" is being used to infer that patients will receive direct medical benefit, because they imagine that these cells will differentiate into regenerating tissue-producing cells. Such a stem cell treatment will presumably cure the patient of their medically relevant difficulties ranging from osteoarthritic (bone-on-bone) knees to various neurological maladies including dementia. I now urge that we change the name of MSCs to Medicinal Signaling Cells to more accurately reflect the fact that these cells home in on sites of injury or disease and secrete bioactive factors that are immunomodulatory and trophic (regenerative) meaning that these cells make therapeutic drugs in situ that are medicinal. It is, indeed, the patient's own site-specific and tissue-specific resident stem cells that construct the new tissue as stimulated by the bioactive factors secreted by the exogenously supplied MSCs. Stem Cells Translational Medicine 2017;6:1445-1451.},
- author = {Caplan, Arnold I.},
- doi = {10.1002/sctm.17-0051},
- file = {:Users/ryan/Documents/Mendeley Desktop/Caplan - 2017 - Mesenchymal Stem Cells Time to Change the Name!.pdf:pdf},
- issn = {21576564},
- journal = {STEM CELLS Translational Medicine},
- keywords = {MSCs,Medicinal signaling cells,Mesenchymal stem cells,Regenerative medicine},
- month = {jun},
- number = {6},
- pages = {1445--1451},
- title = {{Mesenchymal Stem Cells: Time to Change the Name!}},
- url = {http://doi.wiley.com/10.1002/sctm.17-0051},
- volume = {6},
- year = {2017}
- }
- @article{Bartholomew2009,
- abstract = {Mesenchymal stem cells directly suppress ongoing immune responses. Through production of toleragenic cytokines, inhibition of lymphocyte proliferation, delivery of reparative and protective signals after reperfusion injury, and facilitation of hematopoietic chimerism, these cells demonstrate a wide-ranging potential for the development of multifaceted toleragenic strategies after transplantation.},
- author = {Bartholomew, Amelia and Polchert, David and Szilagyi, Erzsebet and Douglas, G. W. and Kenyon, Norma},
- doi = {10.1097/TP.0b013e3181a287e6},
- file = {:Users/ryan/Documents/Mendeley Desktop/Bartholomew et al. - 2009 - Mesenchymal Stem Cells in the Induction of Transplantation Tolerance.pdf:pdf},
- issn = {0041-1337},
- journal = {Transplantation},
- keywords = {87,and malignancy,as the ability of,immunoprotective responses to infection,mesenchymal stem cells,olerance may be defined,s55,s57,the host to retain,tolerance,transplantation,transplantation 2009},
- month = {may},
- number = {Supplement},
- pages = {S55--S57},
- title = {{Mesenchymal Stem Cells in the Induction of Transplantation Tolerance}},
- url = {https://insights.ovid.com/crossref?an=00007890-200905151-00008},
- volume = {87},
- year = {2009}
- }
- @article{LaMere2016,
- abstract = {The epigenetic determinants driving the responses of CD4 T cells to antigen are currently an area of active research. Much has been done to characterize helper T-cell subsets and their associated genome-wide epigenetic patterns. In contrast, little is known about the dynamics of histone modifications during CD4 T-cell activation and the differential kinetics of these epigenetic marks between naive and memory T cells. In this study, we have detailed the dynamics of genome-wide promoter H3K4me2 and H3K4me3 over a time course during activation of human naive and memory CD4 T cells. Our results demonstrate that changes to H3K4 methylation occur relatively late after activation (5 days) and reinforce activation-induced upregulation of gene expression, affecting multiple pathways important to T-cell activation, differentiation and function. The dynamics and mapped pathways of H3K4 methylation are distinctly different in memory cells, which have substantially more promoters marked by H3K4me3 alone, reinforcing their more differentiated state. Our study provides the first data examining genome-wide histone modification dynamics during CD4 T-cell activation, providing insight into the cross talk between H3K4 methylation and gene expression, and underscoring the impact of these marks upon key pathways integral to CD4 T-cell activation and function.},
- author = {LaMere, S. A. and Thompson, R. C. and Komori, H. K. and Mark, A. and Salomon, D. R.},
- doi = {10.1038/gene.2016.19},
- file = {:Users/ryan/Documents/Mendeley Desktop/LaMere et al. - 2016 - Promoter H3K4 methylation dynamically reinforces activation-induced pathways in human CD4 T cells.pdf:pdf;:Users/ryan/Documents/Mendeley Desktop/LaMere et al. - 2016 - Promoter H3K4 methylation dynamically reinforces activation-induced pathways in human CD4 T cells.ppt:ppt},
- issn = {1476-5470},
- journal = {Genes and immunity},
- month = {jul},
- number = {5},
- pages = {283--97},
- pmid = {27170561},
- publisher = {Nature Publishing Group},
- title = {{Promoter H3K4 methylation dynamically reinforces activation-induced pathways in human CD4 T cells.}},
- url = {http://www.nature.com/articles/gene201619 http://www.ncbi.nlm.nih.gov/pubmed/27170561 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC4956548},
- volume = {17},
- year = {2016}
- }
- @article{Mastrokolias2012,
- abstract = {Transcriptome analysis is of great interest in clinical research, where significant differences between individuals can be translated into biomarkers of disease. Although next generation sequencing provides robust, comparable and highly informative expression profiling data, with several million of tags per blood sample, reticulocyte globin transcripts can constitute up to 76{\%} of total mRNA compromising the detection of low abundant transcripts. We have removed globin transcripts from 6 human whole blood RNA samples with a human globin reduction kit and compared them with the same non-reduced samples using deep Serial Analysis of Gene Expression.},
- author = {Mastrokolias, Anastasios and den Dunnen, Johan T and van Ommen, Gertjan B and {'t Hoen}, Peter a C and van Roon-Mom, Willeke M C},
- doi = {10.1186/1471-2164-13-28},
- file = {:Users/ryan/Documents/Mendeley Desktop/Mastrokolias et al. - 2012 - Increased sensitivity of next generation sequencing-based expression profiling after globin reduction in hu.pdf:pdf},
- issn = {1471-2164},
- journal = {BMC genomics},
- keywords = {Adult,Aged,Female,Gene Expression Profiling,Globins,Globins: genetics,Globins: metabolism,Humans,Male,Middle Aged,RNA,RNA: blood,RNA: genetics,Sensitivity and Specificity,Sequence Analysis, RNA,Signal Transduction},
- month = {jan},
- number = {1},
- pages = {28},
- pmid = {22257641},
- publisher = {BioMed Central Ltd},
- title = {{Increased sensitivity of next generation sequencing-based expression profiling after globin reduction in human blood RNA.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=3275489{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {13},
- year = {2012}
- }
- @article{Simmons2011,
- abstract = {In this article, we accomplish two things. First, we show that despite empirical psychologists' nominal endorsement of a low rate of false-positive findings (≤ .05), flexibility in data collection, analysis, and reporting dramatically increases actual false-positive rates. In many cases, a researcher is more likely to falsely find evidence that an effect exists than to correctly find evidence that it does not. We present computer simulations and a pair of actual experiments that demonstrate how unacceptably easy it is to accumulate (and report) statistically significant evidence for a false hypothesis. Second, we suggest a simple, low-cost, and straightforwardly effective disclosure-based solution to this problem. The solution involves six concrete requirements for authors and four guidelines for reviewers, all of which impose a minimal burden on the publication process.},
- author = {Simmons, Joseph P. and Nelson, Leif D. and Simonsohn, Uri},
- doi = {10.1177/0956797611417632},
- file = {:Users/ryan/Documents/Mendeley Desktop/Simmons, Nelson, Simonsohn - 2011 - False-Positive Psychology.pdf:pdf},
- issn = {0956-7976},
- journal = {Psychological Science},
- keywords = {disclosure,methodology,motivated reasoning,publication},
- month = {nov},
- number = {11},
- pages = {1359--1366},
- title = {{False-Positive Psychology}},
- url = {http://journals.sagepub.com/doi/10.1177/0956797611417632},
- volume = {22},
- year = {2011}
- }
- @article{Young2011,
- abstract = {Transcriptional control is dependent on a vast network of epigenetic modifications. One epigenetic mark of particular interest is tri-methylation of lysine 27 on histone H3 (H3K27me3), which is catalysed and maintained by Polycomb Repressive Complex 2 (PRC2). Although this histone mark is studied widely, the precise relationship between its local pattern of enrichment and regulation of gene expression is currently unclear. We have used ChIP-seq to generate genome-wide maps of H3K27me3 enrichment, and have identified three enrichment profiles with distinct regulatory consequences. First, a broad domain of H3K27me3 enrichment across the body of genes corresponds to the canonical view of H3K27me3 as inhibitory to transcription. Second, a peak of enrichment around the transcription start site (TSS) is commonly associated with 'bivalent' genes, where H3K4me3 also marks the TSS. Finally and most surprisingly, we identified an enrichment profile with a peak in the promoter of genes that is associated with active transcription. Genes with each of these three profiles were found in different proportions in each of the cell types studied. The data analysis techniques developed here will be useful for the identification of common enrichment profiles for other histone modifications that have important consequences for transcriptional regulation.},
- author = {Young, Matthew D. and Willson, Tracy A. and Wakefield, Matthew J. and Trounson, Evelyn and Hilton, Douglas J. and Blewitt, Marnie E. and Oshlack, Alicia and Majewski, Ian J.},
- doi = {10.1093/nar/gkr416},
- file = {:Users/ryan/Documents/Mendeley Desktop/Young et al. - 2011 - ChIP-seq analysis reveals distinct H3K27me3 profiles that correlate with transcriptional activity(2).pdf:pdf},
- isbn = {1362-4962 (Electronic)$\backslash$r0305-1048 (Linking)},
- issn = {1362-4962},
- journal = {Nucleic Acids Research},
- month = {sep},
- number = {17},
- pages = {7415--7427},
- pmid = {21652639},
- title = {{ChIP-seq analysis reveals distinct H3K27me3 profiles that correlate with transcriptional activity}},
- url = {https://academic.oup.com/nar/article-lookup/doi/10.1093/nar/gkr416},
- volume = {39},
- year = {2011}
- }
- @misc{Conesa2016,
- abstract = {RNA-sequencing (RNA-seq) has a wide variety of applications, but no single analysis pipeline can be used in all cases. We review all of the major steps in RNA-seq data analysis, including experimental design, quality control, read alignment, quantification of gene and transcript levels, visualization, differential gene expression, alternative splicing, functional analysis, gene fusion detection and eQTL mapping. We highlight the challenges associated with each step. We discuss the analysis of small RNAs and the integration of RNA-seq with other functional genomics techniques. Finally, we discuss the outlook for novel technologies that are changing the state of the art in transcriptomics.},
- author = {Conesa, Ana and Madrigal, Pedro and Tarazona, Sonia and Gomez-Cabrero, David and Cervera, Alejandra and McPherson, Andrew and Szcze{\'{s}}niak, Michal Wojciech and Gaffney, Daniel J. and Elo, Laura L. and Zhang, Xuegong and Mortazavi, Ali},
- booktitle = {Genome Biology},
- doi = {10.1186/s13059-016-0881-8},
- file = {:Users/ryan/Documents/Mendeley Desktop/Conesa et al. - 2016 - A survey of best practices for RNA-seq data analysis.pdf:pdf},
- issn = {1474760X},
- month = {jan},
- number = {1},
- publisher = {BioMed Central Ltd.},
- title = {{A survey of best practices for RNA-seq data analysis}},
- volume = {17},
- year = {2016}
- }
- @article{Li2001,
- abstract = {BACKGROUND A model-based analysis of oligonucleotide expression arrays we developed previously uses a probe-sensitivity index to capture the response characteristic of a specific probe pair and calculates model-based expression indexes (MBEI). MBEI has standard error attached to it as a measure of accuracy. Here we investigate the stability of the probe-sensitivity index across different tissue types, the reproducibility of results in replicate experiments, and the use of MBEI in perfect match (PM)-only arrays. RESULTS Probe-sensitivity indexes are stable across tissue types. The target gene's presence in many arrays of an array set allows the probe-sensitivity index to be estimated accurately. We extended the model to obtain expression values for PM-only arrays, and found that the 20-probe PM-only model is comparable to the 10-probe PM/MM difference model, in terms of the expression correlations with the original 20-probe PM/MM difference model. MBEI method is able to extend the reliable detection limit of expression to a lower mRNA concentration. The standard errors of MBEI can be used to construct confidence intervals of fold changes, and the lower confidence bound of fold change is a better ranking statistic for filtering genes. We can assign reliability indexes for genes in a specific cluster of interest in hierarchical clustering by resampling clustering trees. A software dChip implementing many of these analysis methods is made available. CONCLUSIONS The model-based approach reduces the variability of low expression estimates, and provides a natural method of calculating expression values for PM-only arrays. The standard errors attached to expression values can be used to assess the reliability of downstream analysis.},
- author = {Li, C and {Hung Wong}, W},
- doi = {10.1186/gb-2001-2-8-research0032},
- file = {:Users/ryan/Documents/Mendeley Desktop/Li, Hung Wong - 2001 - Model-based analysis of oligonucleotide arrays model validation, design issues and standard error application.pdf:pdf},
- issn = {1474-760X},
- journal = {Genome biology},
- keywords = {Cluster Analysis,DNA Probes,Gene Expression Profiling,Gene Expression Profiling: methods,Gene Expression Profiling: standards,Humans,Messenger,Messenger: analysis,Messenger: genetics,Models,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Oligonucleotide Array Sequence Analysis: standards,Organ Specificity,RNA,Reproducibility of Results,Research Design,Sensitivity and Specificity,Software,Statistical},
- month = {jan},
- number = {8},
- pages = {RESEARCH0032},
- pmid = {11532216},
- title = {{Model-based analysis of oligonucleotide arrays: model validation, design issues and standard error application.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/11532216 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC55329},
- volume = {2},
- year = {2001}
- }
- @article{Kurian2014,
- abstract = {There are no minimally invasive diagnostic metrics for acute kidney transplant rejection (AR), especially in the setting of the common confounding diagnosis, acute dysfunction with no rejection (ADNR). Thus, though kidney transplant biopsies remain the gold standard, they are invasive, have substantial risks, sampling error issues and significant costs and are not suitable for serial monitoring. Global gene expression profiles of 148 peripheral blood samples from transplant patients with excellent function and normal histology (TX; n = 46), AR (n = 63) and ADNR (n = 39), from two independent cohorts were analyzed with DNA microarrays. We applied a new normalization tool, frozen robust multi-array analysis, particularly suitable for clinical diagnostics, multiple prediction tools to discover, refine and validate robust molecular classifiers and we tested a novel one-by-one analysis strategy to model the real clinical application of this test. Multiple three-way classifier tools identified 200 highest value probesets with sensitivity, specificity, positive predictive value, negative predictive value and area under the curve for the validation cohort ranging from 82{\%} to 100{\%}, 76{\%} to 95{\%}, 76{\%} to 95{\%}, 79{\%} to 100{\%}, 84{\%} to 100{\%} and 0.817 to 0.968, respectively. We conclude that peripheral blood gene expression profiling can be used as a minimally invasive tool to accurately reveal TX, AR and ADNR in the setting of acute kidney transplant dysfunction.},
- author = {Kurian, S M and Williams, a N and Gelbart, T and Campbell, D and Mondala, T S and Head, S R and Horvath, S and Gaber, L and Thompson, R and Whisenant, T and Lin, W and Langfelder, P and Robison, E H and Schaffer, R L and Fisher, J S and Friedewald, J and Flechner, S M and Chan, L K and Wiseman, A C and Shidban, H and Mendez, R and Heilman, R and Abecassis, M M and Marsh, C L and Salomon, D R},
- doi = {10.1111/ajt.12671},
- file = {:Users/ryan/Documents/Mendeley Desktop/Kurian et al. - 2014 - Molecular classifiers for acute kidney transplant rejection in peripheral blood by whole genome gene expression p.pdf:pdf},
- issn = {16006135},
- journal = {American Journal of Transplantation},
- keywords = {abbreviations,abmr,acute,acute dysfunction with no,antibody-mediated rejection,arrays,gene expression profiling,kidney rejection,micro-,molecular classifiers,rejection},
- month = {may},
- number = {5},
- pages = {1164--1172},
- pmid = {24725967},
- title = {{Molecular Classifiers for Acute Kidney Transplant Rejection in Peripheral Blood by Whole Genome Gene Expression Profiling}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/24725967 http://europepmc.org/abstract/med/24725967 http://doi.wiley.com/10.1111/ajt.12671},
- volume = {14},
- year = {2014}
- }
- @article{Guo2010,
- abstract = {BACKGROUND: data generated using 'omics' technologies are characterized by high dimensionality, where the number of features measured per subject vastly exceeds the number of subjects in the study. In this paper, we consider issues relevant in the design of biomedical studies in which the goal is the discovery of a subset of features and an associated algorithm that can predict a binary outcome, such as disease status. We compare the performance of four commonly used classifiers (K-Nearest Neighbors, Prediction Analysis for Microarrays, Random Forests and Support Vector Machines) in high-dimensionality data settings. We evaluate the effects of varying levels of signal-to-noise ratio in the dataset, imbalance in class distribution and choice of metric for quantifying performance of the classifier. To guide study design, we present a summary of the key characteristics of 'omics' data profiled in several human or animal model experiments utilizing high-content mass spectrometry and multiplexed immunoassay based techniques.
- RESULTS: the analysis of data from seven 'omics' studies revealed that the average magnitude of effect size observed in human studies was markedly lower when compared to that in animal studies. The data measured in human studies were characterized by higher biological variation and the presence of outliers. The results from simulation studies indicated that the classifier Prediction Analysis for Microarrays (PAM) had the highest power when the class conditional feature distributions were Gaussian and outcome distributions were balanced. Random Forests was optimal when feature distributions were skewed and when class distributions were unbalanced. We provide a free open-source R statistical software library (MVpower) that implements the simulation strategy proposed in this paper.
- CONCLUSION: no single classifier had optimal performance under all settings. Simulation studies provide useful guidance for the design of biomedical studies involving high-dimensionality data.},
- author = {Guo, Yu and Graber, Armin and McBurney, Robert N and Balasubramanian, Raji},
- doi = {10.1186/1471-2105-11-447},
- file = {:Users/ryan/Documents/Mendeley Desktop/Guo et al. - 2010 - Sample size and statistical power considerations in high-dimensionality data settings a comparative study of classif.pdf:pdf},
- issn = {1471-2105},
- journal = {BMC bioinformatics},
- keywords = {Algorithms,Animals,Classification,Classification: methods,Databases, Factual,Gene Expression Profiling,Gene Expression Profiling: methods,Humans,Models, Statistical,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Pattern Recognition, Automated,Sample Size},
- month = {jan},
- pages = {447},
- pmid = {20815881},
- title = {{Sample size and statistical power considerations in high-dimensionality data settings: a comparative study of classification algorithms.}},
- url = {http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=2942858{\&}tool=pmcentrez{\&}rendertype=abstract},
- volume = {11},
- year = {2010}
- }
- @article{Guo2007,
- abstract = {In this paper, we introduce a modified version of linear discriminant analysis, called the "shrunken centroids regularized discriminant analysis" (SCRDA). This method generalizes the idea of the "nearest shrunken centroids" (NSC) (Tibshirani and others, 2003) into the classical discriminant analysis. The SCRDA method is specially designed for classification problems in high dimension low sample size situations, for example, microarray data. Through both simulated data and real life data, it is shown that this method performs very well in multivariate classification problems, often outperforms the PAM method (using the NSC algorithm) and can be as competitive as the support vector machines classifiers. It is also suitable for feature elimination purpose and can be used as gene selection method. The open source R package for this method (named "rda") is available on CRAN (http://www.r-project.org) for download and testing.},
- author = {Guo, Yaqian and Hastie, Trevor and Tibshirani, Robert},
- doi = {10.1093/biostatistics/kxj035},
- file = {:Users/ryan/Documents/Mendeley Desktop/Guo, Hastie, Tibshirani - 2007 - Regularized linear discriminant analysis and its application in microarrays.pdf:pdf},
- issn = {1465-4644},
- journal = {Biostatistics (Oxford, England)},
- keywords = {Computer Simulation,DNA, Neoplasm,DNA, Neoplasm: genetics,Discriminant Analysis,Gene Expression Profiling,Gene Expression Profiling: methods,Humans,Linear Models,Neoplasms,Neoplasms: classification,Neoplasms: genetics,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods},
- month = {jan},
- number = {1},
- pages = {86--100},
- pmid = {16603682},
- title = {{Regularized linear discriminant analysis and its application in microarrays.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/16603682},
- volume = {8},
- year = {2007}
- }
- @article{Piccolo2012,
- abstract = {Gene-expression microarrays allow researchers to characterize biological phenomena in a high-throughput fashion but are subject to technological biases and inevitable variabilities that arise during sample collection and processing. Normalization techniques aim to correct such biases. Most existing methods require multiple samples to be processed in aggregate; consequently, each sample's output is influenced by other samples processed jointly. However, in personalized-medicine workflows, samples may arrive serially, so renormalizing all samples upon each new arrival would be impractical. We have developed Single Channel Array Normalization (SCAN), a single-sample technique that models the effects of probe-nucleotide composition on fluorescence intensity and corrects for such effects, dramatically increasing the signal-to-noise ratio within individual samples while decreasing variation across samples. In various benchmark comparisons, we show that SCAN performs as well as or better than competing methods yet has no dependence on external reference samples and can be applied to any single-channel microarray platform.},
- author = {Piccolo, Stephen R and Sun, Ying and Campbell, Joshua D and Lenburg, Marc E and Bild, Andrea H and Johnson, W Evan},
- doi = {10.1016/j.ygeno.2012.08.003},
- file = {:Users/ryan/Documents/Mendeley Desktop/Piccolo et al. - 2012 - A single-sample microarray normalization method to facilitate personalized-medicine workflows.pdf:pdf},
- issn = {1089-8646},
- journal = {Genomics},
- keywords = {Analysis of Variance,Fluorescence,Gene Expression Profiling,Gene Expression Profiling: methods,High-Throughput Screening Assays,High-Throughput Screening Assays: methods,Humans,Individualized Medicine,Individualized Medicine: methods,Oligonucleotide Array Sequence Analysis,Oligonucleotide Array Sequence Analysis: methods,Sample Size,Selection Bias,Signal-To-Noise Ratio,Workflow},
- month = {dec},
- number = {6},
- pages = {337--44},
- pmid = {22959562},
- publisher = {Elsevier Inc.},
- title = {{A single-sample microarray normalization method to facilitate personalized-medicine workflows.}},
- url = {http://www.ncbi.nlm.nih.gov/pubmed/22959562},
- volume = {100},
- year = {2012}
- }
- @article{McCall2010,
- abstract = {Robust multiarray analysis (RMA) is the most widely used preprocessing algorithm for Affymetrix and Nimblegen gene expression microarrays. RMA performs background correction, normalization, and summarization in a modular way. The last 2 steps require multiple arrays to be analyzed simultaneously. The ability to borrow information across samples provides RMA various advantages. For example, the summarization step fits a parametric model that accounts for probe effects, assumed to be fixed across arrays, and improves outlier detection. Residuals, obtained from the fitted model, permit the creation of useful quality metrics. However, the dependence on multiple arrays has 2 drawbacks: (1) RMA cannot be used in clinical settings where samples must be processed individually or in small batches and (2) data sets preprocessed separately are not comparable. We propose a preprocessing algorithm, frozen RMA (fRMA), which allows one to analyze microarrays individually or in small batches and then combine the data for analysis. This is accomplished by utilizing information from the large publicly available microarray databases. In particular, estimates of probe-specific effects and variances are precomputed and frozen. Then, with new data sets, these are used in concert with information from the new arrays to normalize and summarize the data. We find that fRMA is comparable to RMA when the data are analyzed as a single batch and outperforms RMA when analyzing multiple batches. The methods described here are implemented in the R package fRMA and are currently available for download from the software section of http://rafalab.jhsph.edu.},
- author = {McCall, Matthew N. and Bolstad, Benjamin M. and Irizarry, Rafael a.},
- doi = {10.1093/biostatistics/kxp059},
- file = {:Users/ryan/Documents/Mendeley Desktop/Hudson K. {\&} Remedios, C. et al. - 2010 - Frozen robust multiarray analysis (fRMA).pdf:pdf},
- isbn = {1471210512369},
- issn = {1465-4644},
- journal = {Biostatistics},
- keywords = {Affymetrix,ArrayExpress,GEO,Microarray,Preprocessing,Single-array,interventions for carers,palliative care,sytematic review},
- month = {apr},
- number = {2},
- pages = {242--253},
- pmid = {20097884},
- publisher = {BioMed Central Ltd},
- title = {{Frozen robust multiarray analysis (fRMA)}},
- url = {https://academic.oup.com/biostatistics/article-lookup/doi/10.1093/biostatistics/kxp059},
- volume = {11},
- year = {2010}
- }
- @article{LaMere2017,
- abstract = {The changes to the epigenetic landscape in response to Ag during CD4 T cell activation have not been well characterized. Although CD4 T cell subsets have been mapped globally for numerous epigenetic marks, little has been done to study their dynamics early after activation. We have studied changes to promoter H3K27me3 during activation of human naive and memory CD4 T cells. Our results show that these changes occur relatively early (1 d) after activation of naive and memory cells and that demethylation is the predominant change to H3K27me3 at this time point, reinforcing high expression of target genes. Additionally, inhibition of the H3K27 demethylase JMJD3 in naive CD4 T cells demonstrates how critically important molecules required for T cell differentiation, such as JAK2 and IL12RB2, are regulated by H3K27me3. Our results show that H3K27me3 is a dynamic and important epigenetic modification during CD4 T cell activation and that JMJD3-driven H3K27 demethylation is critical for CD4 T cell function.},
- author = {LaMere, Sarah A. and Thompson, Ryan C. and Meng, Xiangzhi and Komori, H. Kiyomi and Mark, Adam and Salomon, Daniel R.},
- doi = {10.4049/jimmunol.1700475},
- file = {:Users/ryan/Documents/Mendeley Desktop/LaMere et al. - 2017 - H3K27 Methylation Dynamics during CD4 T Cell Activation Regulation of JAKSTAT and IL12RB2 Expression by JMJD3(2).pdf:pdf},
- issn = {0022-1767},
- journal = {The Journal of Immunology},
- month = {nov},
- number = {9},
- pages = {3158--3175},
- pmid = {28947543},
- title = {{H3K27 Methylation Dynamics during CD4 T Cell Activation: Regulation of JAK/STAT and IL12RB2 Expression by JMJD3}},
- url = {http://www.jimmunol.org/lookup/doi/10.4049/jimmunol.1700475},
- volume = {199},
- year = {2017}
- }
- @article{Aitken2018,
- author = {Aitken, Sarah J and Ibarra-soria, Ximena and Kentepozidou, Elissavet and Flicek, Paul and Feig, Christine and Marioni, John C and Odom, Duncan T},
- file = {:Users/ryan/Documents/Mendeley Desktop/Aitken et al. - 2018 - CTCF maintains regulatory homeostasis of cancer pathways.pdf:pdf},
- keywords = {CTCF,Transcription,Hemizygosity,Cancer,Chromatin s,cancer,chromatin architecture,chromatin state,ctcf,hemizygosity,transcription},
- pages = {1--17},
- publisher = {Genome Biology},
- title = {{CTCF maintains regulatory homeostasis of cancer pathways}},
- url = {https://genomebiology.biomedcentral.com/track/pdf/10.1186/s13059-018-1484-3},
- year = {2018}
- }
- @article{Statham2010,
- abstract = {SUMMARY Epigenetics, the study of heritable somatic phenotypic changes not related to DNA sequence, has emerged as a critical component of the landscape of gene regulation. The epigenetic layers, such as DNA methylation, histone modifications and nuclear architecture are now being extensively studied in many cell types and disease settings. Few software tools exist to summarize and interpret these datasets. We have created a toolbox of procedures to interrogate and visualize epigenomic data (both array- and sequencing-based) and make available a software package for the cross-platform R language. AVAILABILITY The package is freely available under LGPL from the R-Forge web site (http://repitools.r-forge.r-project.org/) CONTACT mrobinson@wehi.edu.au.},
- author = {Statham, Aaron L. and Strbenac, Dario and Coolen, Marcel W. and Stirzaker, Clare and Clark, Susan J. and Robinson, Mark D.},
- doi = {10.1093/bioinformatics/btq247},
- file = {:Users/ryan/Documents/Mendeley Desktop/Statham et al. - 2010 - Repitools an R package for the analysis of enrichment-based epigenomic data.pdf:pdf},
- isbn = {1367-4811 (Electronic)$\backslash$r1367-4803 (Linking)},
- issn = {1367-4803},
- journal = {Bioinformatics},
- month = {jul},
- number = {13},
- pages = {1662--1663},
- pmid = {20457667},
- title = {{Repitools: an R package for the analysis of enrichment-based epigenomic data}},
- url = {https://academic.oup.com/bioinformatics/article-lookup/doi/10.1093/bioinformatics/btq247},
- volume = {26},
- year = {2010}
- }
|