R version 2.9.2 (2009-08-24)
Copyright (C) 2009 The R Foundation for Statistical Computing
ISBN 3-900051-07-0

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

 - MKL libraries for acclerated math installed: see help (setMKLthreads)
 - ParallelR packages installed: see help (package='foreach')

Type 'revo()' to visit www.revolution-computing.com for the latest
REvolution R news, 'forum()' for the community forum, or 'readme()'
for release notes.

> png(filename="lymphoblastic.r%02d.png")
> ##
> ##
> ## This source comes from Bolstad et al, "Quality Assessment of Affymetric
> ## GeneChip Data" which is part of Bioinformatics and Computational Biology
> ## Solutions Using R and Bioconductor from springer
> ##
> ##
> 
> 
> ##SPLIT: install-lymph
> 
> ## As before, we need to install some packages for this to work. However, this
> ## does not need to be run every time, so this may be commented out. 
> source( "http://bioconductor.org/biocLite.R")
> biocLite( "ALLMLL" )
Using R version 2.9.2, biocinstall version 2.4.13.
Installing Bioconductor version 2.4 packages:
[1] "ALLMLL"
Please wait...

 >>> Building/Updating help pages for package 'ALLMLL'
     Formats: text html latex example 
  MLL                               text    html    latex

The downloaded packages are in
	‘/tmp/Rtmp1qX8dK/downloaded_packages’
> biocLite( "AmpAffyExample" )
Using R version 2.9.2, biocinstall version 2.4.13.
Installing Bioconductor version 2.4 packages:
[1] "AmpAffyExample"
Please wait...

 >>> Building/Updating help pages for package 'AmpAffyExample'
     Formats: text html latex example 
  AmpData                           text    html    latex

The downloaded packages are in
	‘/tmp/Rtmp1qX8dK/downloaded_packages’
> 
> 
> 
> ##SPLIT: prepare-lymph
> 
> ## we need to load some libraries and do some preparation on the initial data
> ## sets. the affy library is a set of standard analysis routines. while ALLMLL
> ## contains the data reported at Mary E. Ross, Xiaodong Zhou, Guangchun Song,
> ## Sheila A. Shurtleff, Kevin Girtman, W. Kent Williams, Hsi-Che Liu, Rami
> ## Mahfouz, Susana C. Raimondi, Noel Lenny, Anami Patel, and James R. Downing
> ## (2003) Classification of pediatric acute lymphoblastic leukemia by gene
> ## expression profiling Blood 102: 2951-2959
> library( "affy" )
> library( "ALLMLL" )
> 
> 
> data( MLL.B )
> Data <- MLL.B[, c(2,1,3:5,14,6,13)]
> sampleNames(Data) <- letters[1:8]
> 
> ##SPLIT: visual-lymph
> 
> ## Now that we have prepared the data, let's try looking at some of it. First,
> ## set up the visualisation
> palette.gray <- c(rep(gray(0:10/10), times = seq(1,41,by=4)))
> par(mfrow=c(1,2))
> ## now view the data, one gray scale, the other on a log scale. You should see
> ## that this chip has a relatively strong spatial artifact, or as it is more
> ## technically known, a light bit down the side.
> image(Data[,1], transfo=function(x) x, col=palette.gray)
> image(Data[,1], col = palette.gray)
> 
> 
> ## Next we can consider the distribution of the intensities of the probes with
> ## a box plot, as well as probe level data.
> 
> ## The practical outcome here is that the chip marked "f" is a bit suspicious,
> ## being a long way of the range of the other chips. Chip "a" has a biomodal
> ## distribution, which is probably a spatial artifact.
> library( "RColorBrewer" )
> cols <- brewer.pal(8, "Set1")
> boxplot(Data, col = cols)
> 
> hist(Data, col=cols, lty = 1, xlab="Log (base 2) intensities")
> legend(12, 1, letters[1:8],lty=1,col=cols)
> 
> ## and scatter plots -- again, f, is an outlier. 
> par(mfrow = c(2,4))
> MAplot(Data,cex=0.75)
> mtext( "M", 2, outer=TRUE)
> mtext( "A", 1, outer=TRUE)
> 
> 
> ##SPLIT: affy-quality-lymph
> 
> ## these stats are some simple values that can be indicative of problems.
> ## simpleaffy calculates them all for you
> library( "simpleaffy" )
> Data.qc <- qc(Data)
> ## this is average background -- they should all be about the same, f isn't
> avbg(Data.qc)
        a         b         c         d         e         f         g         h 
 68.18425  67.34494  42.12819  61.31731  53.64844 128.41264  49.39112  49.25758 
> ## scale factors, should be within 3x each other. f and g look bad
> sfs(Data.qc)
[1]  9.765986  4.905489 10.489529  7.053323  7.561613  2.475224 13.531238
[8]  8.089458
> ## are we missing lots of samples
> percent.present(Data.qc)
a.present b.present c.present d.present e.present f.present g.present h.present 
 21.65158  26.53124  25.58181  23.53279  23.35615  25.25061  17.96423  24.40274 
> ## 3/5 ratios...
> ratios(Data.qc)[,1:2]
  actin3/actin5 actin3/actinM
a     0.9697007    0.12291462
b     0.3235390   -0.19439139
c     0.4661537   -0.14331962
d     1.2567868    0.15861351
e     0.6036608    0.02095918
f     0.6715308    0.02916033
g     0.3798125   -0.15918419
h     0.4850414   -0.17911051
> 
> 
> ##SPLIT: three-five-and-plm
> 
> ## We use a different data set for this part. The original location is not
> ## attributed here, so I don't know where this data comes from.
> library( "AmpAffyExample" )
> data( AmpData )
> 
> ## RNA Degregation -- unfortunately, this varies a bit from chip to chip, so
> ## there are fewer general rules about what is okay, and what is not.
> sampleNames(AmpData) <- c("N1", "N2", "N3", "A1", "A2", "A3" )
> RNAdeg <- AffyRNAdeg(AmpData)
> 
> plotAffyRNAdeg(RNAdeg,col=c(2,2,2,3,3,3))
> summaryAffyRNAdeg(RNAdeg)
            N1       N2       N3       A1       A2       A3
slope  2.3e+00 2.21e+00 2.56e+00 5.38e+00 4.32e+00 5.68e+00
pvalue 3.9e-08 8.13e-08 3.12e-09 9.03e-12 4.91e-10 5.35e-12
> 
> 
> ## probe level models can show up more subtle artifacts
> library( "affyPLM" )
> Pset1 <- fitPLM( AmpData )
> show( Pset1 )
Probe level linear model (PLMset) object
size of arrays=712x712
cdf=HG-U133A (22283 probeset ids)
number of samples=6
number of probesets=22283
number of chip level parameters for each probeset=6
annotation=hgu133a
PLMset settings
Creating function: fitPLM 
Preprocessing
Background Correction=TRUE Method= RMA.2
Normalization=TRUE Method= quantile

Model/Summarization
$constraint.type
          default 
"contr.treatment" 

$variable.type
 default 
"factor" 

$model.param
$model.param$trans.fn
[1] "log2"

$model.param$se.type
[1] 4

$model.param$psi.type
[1] 0

$model.param$psi.k
[1] 1.345

$model.param$max.its
[1] 20

$model.param$init.method
[1] "ls"

$model.param$weights.chip
NULL

$model.param$weights.probe
NULL


Output Settings
$weights
[1] TRUE

$residuals
[1] TRUE

$varcov
[1] "none"

$resid.SE
[1] TRUE

> 
> ## this one shows a chip with a ring in the middle. 
> par(mfrow = c(2,2))
> image(AmpData[,3])
> image(Pset1,type="weights",which=3)
> image(Pset1,type="resids",which=3)
> image(Pset1,type="sign.resids",which=3)
> 
> 
> ##SPLIT: more-plm
> ## and finally some more PLM data on the original data set. 
> library( "affyPLM" )
> Pset2 <- fitPLM(MLL.B)
> Mbox( Pset2, ylim=c(-1,1), col = cols,
+      names = NULL, main="RLE")
> 
> boxplot(Pset2, ylim=c(0.95,1.5), col=cols,
+         names=NULL,main="NUSE",outline=FALSE)
> 
> 
> ##SPLIT: end
> warnings()
NULL
>