consistency-evaluate.R 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. #!/usr/bin/env Rscript
  2. # Script to check reproducibility of fRMA training process
  3. library(xlsx)
  4. library(frma)
  5. library(frmaTools)
  6. library(stringr)
  7. library(magrittr)
  8. library(plyr)
  9. library(affy)
  10. library(preprocessCore)
  11. library(ggplot2)
  12. library(proto)
  13. library(dplyr)
  14. load("consistency.rda")
  15. ## Select a random subset of 20 arrays from each tissue (Would rather
  16. ## use entire dataset but ENOMEM)
  17. set.seed(1986)
  18. norm.exprs <- lapply(names(vectors), function(ttype) {
  19. stab <- sample.tables[[ttype]] %>% sample_n(20)
  20. tsmsg("Reading 20 random arrays for ", ttype)
  21. affy <- ReadAffy(filenames=stab$Filename, sampleNames=rownames(stab))
  22. tsmsg("Normalizing with RMA for comparison")
  23. eset.rma <- rma(affy)
  24. rma.exprs <- eset.rma %>% exprs %>% as.vector
  25. tsmsg("Normalizing with 5 trains fRMA vector sets")
  26. esets.frma <- lapply(vectors[[ttype]], . %>% frma(affy, input.vecs=.))
  27. frma.exprs <- sapply(esets.frma, . %>% exprs %>% as.vector)
  28. data.frame(RMA=rma.exprs, fRMA=frma.exprs)
  29. }) %>% setNames(names(vectors))
  30. ## Save because the above takes a while
  31. save.image("consistency.rda")
  32. dir.create("fRMA_consistency_results", FALSE)
  33. ## Compute M/A data for all pairwise comparisons
  34. ma.data <- lapply(norm.exprs, function(normexprs) {
  35. normexprs %>% names %>% combn(2, simplify=FALSE) %>% ldply(. %>% {
  36. col1 <- .[1]
  37. col2 <- .[2]
  38. x1 <- normexprs[[col1]]
  39. x2 <- normexprs[[col2]]
  40. data.frame(Comparison = str_c(col1,".vs.",col2),
  41. x1=x1, x2=x2,
  42. M = x2 - x1,
  43. A = (x1+x2)/2)
  44. })
  45. })
  46. for (ttype in names(norm.exprs)) {
  47. madata <- ma.data[[ttype]]
  48. pdf(str_c("fRMA_consistency_results/MA Plots for ", ttype, ".pdf"))
  49. ## MA plot for every pair of normalizations
  50. ddply(madata, .(Comparison), . %$% {
  51. smoothScatter(x=A, y=M, nbin=512, main=Comparison[1], xlab="A", ylab="M")
  52. })
  53. dev.off()
  54. ## M boxplots & violin plots
  55. pdf(str_c("fRMA_consistency_results/M Boxplots for ", ttype, ".pdf"),
  56. width=8, height=12)
  57. p <- ggplot(madata) + aes(x=Comparison, y=M) +
  58. scale_x_discrete(limits = rev(levels(madata$Comparison))) +
  59. coord_flip()
  60. print(p + geom_boxplot(notch=TRUE, outlier.shape = NA) +
  61. ggtitle(str_c("Boxplots of M value distributions for ", ttype)))
  62. print(p + geom_violin(scale="width") +
  63. ggtitle(str_c("Violin plots of M value distributions for ", ttype)))
  64. dev.off()
  65. }