1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374 |
- #!/usr/bin/env Rscript
- # Script to check reproducibility of fRMA training process
- library(xlsx)
- library(frma)
- library(frmaTools)
- library(stringr)
- library(magrittr)
- library(plyr)
- library(affy)
- library(preprocessCore)
- library(ggplot2)
- library(proto)
- library(dplyr)
- load("consistency.rda")
- ## Select a random subset of 20 arrays from each tissue (Would rather
- ## use entire dataset but ENOMEM)
- set.seed(1986)
- norm.exprs <- lapply(names(vectors), function(ttype) {
- stab <- sample.tables[[ttype]] %>% sample_n(20)
- tsmsg("Reading 20 random arrays for ", ttype)
- affy <- ReadAffy(filenames=stab$Filename, sampleNames=rownames(stab))
- tsmsg("Normalizing with RMA for comparison")
- eset.rma <- rma(affy)
- rma.exprs <- eset.rma %>% exprs %>% as.vector
- tsmsg("Normalizing with 5 trains fRMA vector sets")
- esets.frma <- lapply(vectors[[ttype]], . %>% frma(affy, input.vecs=.))
- frma.exprs <- sapply(esets.frma, . %>% exprs %>% as.vector)
- data.frame(RMA=rma.exprs, fRMA=frma.exprs)
- }) %>% setNames(names(vectors))
- ## Save because the above takes a while
- save.image("consistency.rda")
- dir.create("fRMA_consistency_results", FALSE)
- ## Compute M/A data for all pairwise comparisons
- ma.data <- lapply(norm.exprs, function(normexprs) {
- normexprs %>% names %>% combn(2, simplify=FALSE) %>% ldply(. %>% {
- col1 <- .[1]
- col2 <- .[2]
- x1 <- normexprs[[col1]]
- x2 <- normexprs[[col2]]
- data.frame(Comparison = str_c(col1,".vs.",col2),
- x1=x1, x2=x2,
- M = x2 - x1,
- A = (x1+x2)/2)
- })
- })
- for (ttype in names(norm.exprs)) {
- madata <- ma.data[[ttype]]
- pdf(str_c("fRMA_consistency_results/MA Plots for ", ttype, ".pdf"))
- ## MA plot for every pair of normalizations
- ddply(madata, .(Comparison), . %$% {
- smoothScatter(x=A, y=M, nbin=512, main=Comparison[1], xlab="A", ylab="M")
- })
- dev.off()
- ## M boxplots & violin plots
- pdf(str_c("fRMA_consistency_results/M Boxplots for ", ttype, ".pdf"),
- width=8, height=12)
- p <- ggplot(madata) + aes(x=Comparison, y=M) +
- scale_x_discrete(limits = rev(levels(madata$Comparison))) +
- coord_flip()
- print(p + geom_boxplot(notch=TRUE, outlier.shape = NA) +
- ggtitle(str_c("Boxplots of M value distributions for ", ttype)))
- print(p + geom_violin(scale="width") +
- ggtitle(str_c("Violin plots of M value distributions for ", ttype)))
- dev.off()
- }
|