#!/usr/bin/env Rscript
# Script to check reproducibility of fRMA training process
library(xlsx)
library(frma)
library(frmaTools)
library(stringr)
library(magrittr)
library(plyr)
library(affy)
library(preprocessCore)
library(ggplot2)
library(proto)
library(dplyr)
load("consistency.rda")
## Select a random subset of 20 arrays from each tissue (Would rather
## use entire dataset but ENOMEM)
set.seed(1986)
norm.exprs <- lapply(names(vectors), function(ttype) {
stab <- sample.tables[[ttype]] %>% sample_n(20)
tsmsg("Reading 20 random arrays for ", ttype)
affy <- ReadAffy(filenames=stab$Filename, sampleNames=rownames(stab))
tsmsg("Normalizing with RMA for comparison")
eset.rma <- rma(affy)
rma.exprs <- eset.rma %>% exprs %>% as.vector
tsmsg("Normalizing with 5 trains fRMA vector sets")
esets.frma <- lapply(vectors[[ttype]], . %>% frma(affy, input.vecs=.))
frma.exprs <- sapply(esets.frma, . %>% exprs %>% as.vector)
data.frame(RMA=rma.exprs, fRMA=frma.exprs)
}) %>% setNames(names(vectors))
## Save because the above takes a while
save.image("consistency.rda")
dir.create("fRMA_consistency_results", FALSE)
## Compute M/A data for all pairwise comparisons
ma.data <- lapply(norm.exprs, function(normexprs) {
normexprs %>% names %>% combn(2, simplify=FALSE) %>% ldply(. %>% {
col1 <- .[1]
col2 <- .[2]
x1 <- normexprs[[col1]]
x2 <- normexprs[[col2]]
data.frame(Comparison = str_c(col1,".vs.",col2),
x1=x1, x2=x2,
M = x2 - x1,
A = (x1+x2)/2)
})
})
for (ttype in names(norm.exprs)) {
madata <- ma.data[[ttype]]
pdf(str_c("fRMA_consistency_results/MA Plots for ", ttype, ".pdf"))
## MA plot for every pair of normalizations
ddply(madata, .(Comparison), . %$% {
smoothScatter(x=A, y=M, nbin=512, main=Comparison[1], xlab="A", ylab="M")
})
dev.off()
## M boxplots & violin plots
pdf(str_c("fRMA_consistency_results/M Boxplots for ", ttype, ".pdf"),
width=8, height=12)
p <- ggplot(madata) + aes(x=Comparison, y=M) +
scale_x_discrete(limits = rev(levels(madata$Comparison))) +
coord_flip()
print(p + geom_boxplot(notch=TRUE, outlier.shape = NA) +
ggtitle(str_c("Boxplots of M value distributions for ", ttype)))
print(p + geom_violin(scale="width") +
ggtitle(str_c("Violin plots of M value distributions for ", ttype)))
dev.off()
}