##
##
## This source comes from Bolstad et al, "Quality Assessment of Affymetric
## GeneChip Data" which is part of Bioinformatics and Computational Biology
## Solutions Using R and Bioconductor from springer
##
##


##SPLIT: install-lymph

## As before, we need to install some packages for this to work. However, this
## does not need to be run every time, so this may be commented out. 
source( "http://bioconductor.org/biocLite.R")
biocLite( "ALLMLL" )
biocLite( "AmpAffyExample" )


##SPLIT: prepare-lymph

## we need to load some libraries and do some preparation on the initial data
## sets. the affy library is a set of standard analysis routines. while ALLMLL
## contains the data reported at Mary E. Ross, Xiaodong Zhou, Guangchun Song,
## Sheila A. Shurtleff, Kevin Girtman, W. Kent Williams, Hsi-Che Liu, Rami
## Mahfouz, Susana C. Raimondi, Noel Lenny, Anami Patel, and James R. Downing
## (2003) Classification of pediatric acute lymphoblastic leukemia by gene
## expression profiling Blood 102: 2951-2959
library( "affy" )
library( "ALLMLL" )


data( MLL.B )
Data <- MLL.B[, c(2,1,3:5,14,6,13)]
sampleNames(Data) <- letters[1:8]

##SPLIT: visual-lymph

## Now that we have prepared the data, let's try looking at some of it. First,
## set up the visualisation
palette.gray <- c(rep(gray(0:10/10), times = seq(1,41,by=4)))
par(mfrow=c(1,2))
## now view the data, one gray scale, the other on a log scale. You should see
## that this chip has a relatively strong spatial artifact, or as it is more
## technically known, a light bit down the side.
image(Data[,1], transfo=function(x) x, col=palette.gray)
image(Data[,1], col = palette.gray)


## Next we can consider the distribution of the intensities of the probes with
## a box plot, as well as probe level data.

## The practical outcome here is that the chip marked "f" is a bit suspicious,
## being a long way of the range of the other chips. Chip "a" has a biomodal
## distribution, which is probably a spatial artifact.
library( "RColorBrewer" )
cols <- brewer.pal(8, "Set1")
boxplot(Data, col = cols)

hist(Data, col=cols, lty = 1, xlab="Log (base 2) intensities")
legend(12, 1, letters[1:8],lty=1,col=cols)

## and scatter plots -- again, f, is an outlier. 
par(mfrow = c(2,4))
MAplot(Data,cex=0.75)
mtext( "M", 2, outer=TRUE)
mtext( "A", 1, outer=TRUE)


##SPLIT: affy-quality-lymph

## these stats are some simple values that can be indicative of problems.
## simpleaffy calculates them all for you
library( "simpleaffy" )
Data.qc <- qc(Data)
## this is average background -- they should all be about the same, f isn't
avbg(Data.qc)
## scale factors, should be within 3x each other. f and g look bad
sfs(Data.qc)
## are we missing lots of samples
percent.present(Data.qc)
## 3/5 ratios...
ratios(Data.qc)[,1:2]


##SPLIT: three-five-and-plm

## We use a different data set for this part. The original location is not
## attributed here, so I don't know where this data comes from.
library( "AmpAffyExample" )
data( AmpData )

## RNA Degregation -- unfortunately, this varies a bit from chip to chip, so
## there are fewer general rules about what is okay, and what is not.
sampleNames(AmpData) <- c("N1", "N2", "N3", "A1", "A2", "A3" )
RNAdeg <- AffyRNAdeg(AmpData)

plotAffyRNAdeg(RNAdeg,col=c(2,2,2,3,3,3))
summaryAffyRNAdeg(RNAdeg)


## probe level models can show up more subtle artifacts
library( "affyPLM" )
Pset1 <- fitPLM( AmpData )
show( Pset1 )

## this one shows a chip with a ring in the middle. 
par(mfrow = c(2,2))
image(AmpData[,3])
image(Pset1,type="weights",which=3)
image(Pset1,type="resids",which=3)
image(Pset1,type="sign.resids",which=3)


##SPLIT: more-plm
## and finally some more PLM data on the original data set. 
library( "affyPLM" )
Pset2 <- fitPLM(MLL.B)
Mbox( Pset2, ylim=c(-1,1), col = cols,
     names = NULL, main="RLE")

boxplot(Pset2, ylim=c(0.95,1.5), col=cols,
        names=NULL,main="NUSE",outline=FALSE)


##SPLIT: end