## ## ## This source comes from Bolstad et al, "Quality Assessment of Affymetric ## GeneChip Data" which is part of Bioinformatics and Computational Biology ## Solutions Using R and Bioconductor from springer ## ## ##SPLIT: install-lymph ## As before, we need to install some packages for this to work. However, this ## does not need to be run every time, so this may be commented out. source( "http://bioconductor.org/biocLite.R") biocLite( "ALLMLL" ) biocLite( "AmpAffyExample" ) ##SPLIT: prepare-lymph ## we need to load some libraries and do some preparation on the initial data ## sets. the affy library is a set of standard analysis routines. while ALLMLL ## contains the data reported at Mary E. Ross, Xiaodong Zhou, Guangchun Song, ## Sheila A. Shurtleff, Kevin Girtman, W. Kent Williams, Hsi-Che Liu, Rami ## Mahfouz, Susana C. Raimondi, Noel Lenny, Anami Patel, and James R. Downing ## (2003) Classification of pediatric acute lymphoblastic leukemia by gene ## expression profiling Blood 102: 2951-2959 library( "affy" ) library( "ALLMLL" ) data( MLL.B ) Data <- MLL.B[, c(2,1,3:5,14,6,13)] sampleNames(Data) <- letters[1:8] ##SPLIT: visual-lymph ## Now that we have prepared the data, let's try looking at some of it. First, ## set up the visualisation palette.gray <- c(rep(gray(0:10/10), times = seq(1,41,by=4))) par(mfrow=c(1,2)) ## now view the data, one gray scale, the other on a log scale. You should see ## that this chip has a relatively strong spatial artifact, or as it is more ## technically known, a light bit down the side. image(Data[,1], transfo=function(x) x, col=palette.gray) image(Data[,1], col = palette.gray) ## Next we can consider the distribution of the intensities of the probes with ## a box plot, as well as probe level data. ## The practical outcome here is that the chip marked "f" is a bit suspicious, ## being a long way of the range of the other chips. Chip "a" has a biomodal ## distribution, which is probably a spatial artifact. library( "RColorBrewer" ) cols <- brewer.pal(8, "Set1") boxplot(Data, col = cols) hist(Data, col=cols, lty = 1, xlab="Log (base 2) intensities") legend(12, 1, letters[1:8],lty=1,col=cols) ## and scatter plots -- again, f, is an outlier. par(mfrow = c(2,4)) MAplot(Data,cex=0.75) mtext( "M", 2, outer=TRUE) mtext( "A", 1, outer=TRUE) ##SPLIT: affy-quality-lymph ## these stats are some simple values that can be indicative of problems. ## simpleaffy calculates them all for you library( "simpleaffy" ) Data.qc <- qc(Data) ## this is average background -- they should all be about the same, f isn't avbg(Data.qc) ## scale factors, should be within 3x each other. f and g look bad sfs(Data.qc) ## are we missing lots of samples percent.present(Data.qc) ## 3/5 ratios... ratios(Data.qc)[,1:2] ##SPLIT: three-five-and-plm ## We use a different data set for this part. The original location is not ## attributed here, so I don't know where this data comes from. library( "AmpAffyExample" ) data( AmpData ) ## RNA Degregation -- unfortunately, this varies a bit from chip to chip, so ## there are fewer general rules about what is okay, and what is not. sampleNames(AmpData) <- c("N1", "N2", "N3", "A1", "A2", "A3" ) RNAdeg <- AffyRNAdeg(AmpData) plotAffyRNAdeg(RNAdeg,col=c(2,2,2,3,3,3)) summaryAffyRNAdeg(RNAdeg) ## probe level models can show up more subtle artifacts library( "affyPLM" ) Pset1 <- fitPLM( AmpData ) show( Pset1 ) ## this one shows a chip with a ring in the middle. par(mfrow = c(2,2)) image(AmpData[,3]) image(Pset1,type="weights",which=3) image(Pset1,type="resids",which=3) image(Pset1,type="sign.resids",which=3) ##SPLIT: more-plm ## and finally some more PLM data on the original data set. library( "affyPLM" ) Pset2 <- fitPLM(MLL.B) Mbox( Pset2, ylim=c(-1,1), col = cols, names = NULL, main="RLE") boxplot(Pset2, ylim=c(0.95,1.5), col=cols, names=NULL,main="NUSE",outline=FALSE) ##SPLIT: end