R version 2.9.2 (2009-08-24)
Copyright (C) 2009 The R Foundation for Statistical Computing
ISBN 3-900051-07-0

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

 - MKL libraries for acclerated math installed: see help (setMKLthreads)
 - ParallelR packages installed: see help (package='foreach')

Type 'revo()' to visit www.revolution-computing.com for the latest
REvolution R news, 'forum()' for the community forum, or 'readme()'
for release notes.

> png(filename="basic_quality_images.r%02d.png")
> 
> ### Load the appropriate library for reading affy data, and inspect the data.
> library( 'affy' )
> soy.ab <- ReadAffy( 'geo_data/GSM209576.CEL.gz',
+                    'geo_data/GSM209585.CEL.gz',
+                    'geo_data/GSM209594.CEL.gz',
+                    'geo_data/GSM209577.CEL.gz',
+                    'geo_data/GSM209586.CEL.gz',
+                    'geo_data/GSM209595.CEL.gz',
+ 
+                    ## we have gz files which R can read in.
+                    compress=TRUE)
> 
> 
> 
> ## Inspect the loaded data. This will make a network connection first time
> ## around and can be slow.
> soy.ab
AffyBatch object
size of arrays=1164x1164 features (8 kb)
cdf=Soybean (61170 affyids)
number of samples=6
number of genes=61170
annotation=soybean
notes=  
> 
> ## Check out the names of the samples.
> sampleNames( soy.ab )
[1] "GSM209576.CEL.gz" "GSM209585.CEL.gz" "GSM209594.CEL.gz" "GSM209577.CEL.gz"
[5] "GSM209586.CEL.gz" "GSM209595.CEL.gz"
> 
> 
> 
> ## as the current sample names refer to the original files, we change this for
> ## something more, er, easy to remember.
> new.sampleNames <- c('hr.a3.12','hr.b3.12','hr.c3.12',
+                      'ts.a4.12','ts.b4.12','ts.c4.12')
> sampleNames(soy.ab) <- new.sampleNames
> 
> ## and check that it has worked
> sampleNames( soy.ab )
[1] "hr.a3.12" "hr.b3.12" "hr.c3.12" "ts.a4.12" "ts.b4.12" "ts.c4.12"
> 
> 
> 
> ##
> ## We are trying to do some subsetting because not all of the probes on the
> ## chip are from soy
> 
> 
> ## read in another data frame called Species.Affy.ID.
> ## this links species names to affy ids.
> Species.Affy.ID <- read.table('SpeciesAffyID.txt', header = T, sep = "")
> dim(Species.Affy.ID)
[1] 61170     2
> 
> 
> 
> load( 'SoybeanCutObjects.RData' )
> 
> tv.for.glycine.max <- Species.Affy.ID$species == 'Glycine max'
> table( tv.for.glycine.max )
tv.for.glycine.max
FALSE  TRUE 
23426 37744 
> listOutProbeSets <- Species.Affy.ID$affyID[ tv.for.glycine.max==FALSE ]
> 
> length( listOutProbeSets )
[1] 23426
> is.factor( listOutProbeSets )
[1] TRUE
> 
> ## Create a character vector for listOutProbeSets
> ## One way: rename listOutProbeSets as a character vector
> listOutProbeSets <- as.character(listOutProbeSets)
> 
> ## Confirm that listOutProbeSets is a character vector
> is.character(listOutProbeSets)
[1] TRUE
> 
> ## check object
> soy.ab
AffyBatch object
size of arrays=1164x1164 features (8 kb)
cdf=Soybean (61170 affyids)
number of samples=6
number of genes=61170
annotation=soybean
notes=  
> 
> 
> ## this is the bit which actually removes the stuff we are not intereste
> RemoveProbes(listOutProbes=NULL, listOutProbeSets, cdfpackagename, probepackagename)
> 
> ## Check that the object has less IDs now. There should be 37444.
> soy.ab
AffyBatch object
size of arrays=1164x1164 features (8 kb)
cdf=Soybean (37744 affyids)
number of samples=6
number of genes=37744
annotation=soybean
notes=  
> 
> # Start preparation for phenoData slot in AffyBatch object
> pd <- data.frame(population = c(1,1,1,2,2,2), replicate = c(1,2,3,1,2,3))
> 
> # Display contents of pd
> pd
  population replicate
1          1         1
2          1         2
3          1         3
4          2         1
5          2         2
6          2         3
> 
> # Assign the sampleNames(soy.ab) to the rownames of pd
> rownames(pd) <- sampleNames(soy.ab)
> 
> # Display contents of pd again, notice change in rownames
> pd
         population replicate
hr.a3.12          1         1
hr.b3.12          1         2
hr.c3.12          1         3
ts.a4.12          2         1
ts.b4.12          2         2
ts.c4.12          2         3
> 
> ## Continue preparation for phenoData slot
> metaData <- data.frame(labelDescription = c( 'population', 'replicate' ))
> 
> ## Establish new phenoData slot
> phenoData(soy.ab) <- new( 'AnnotatedDataFrame', data = pd, varMetadata = metaData)
> 
> ## Display pData(soy.ab)
> pData(soy.ab)
         population replicate
hr.a3.12          1         1
hr.b3.12          1         2
hr.c3.12          1         3
ts.a4.12          2         1
ts.b4.12          2         2
ts.c4.12          2         3
> 
> ## Display phenoData(soy.ab)
> phenoData(soy.ab)
An object of class "AnnotatedDataFrame"
  rowNames: hr.a3.12, hr.b3.12, ..., ts.c4.12  (6 total)
  varLabels and varMetadata description:
    population: population
    replicate: replicate
> 
> palette.gray <- c(rep(gray(0:10/10), times = seq(1,41, by = 4)))
> 
> library('RColorBrewer')
> brewer.cols <- brewer.pal(6, 'Set1')
> 
> 
> ## Now we want to look at the images of the chips. Basically, we are looking
> ## for something unusual here; one chip too bright for example.
> 
> ## Construct image plots for quality control assessment
> ## Note: that these plots appear dark; they are not log transformed
> par(oma = c(1,1,3,1))
> par(mfrow = c(2,3))
> 
> 
> palette.gray <- c(rep(gray(0:10/10), times = seq(1,41, by = 4)))
> image(soy.ab[,1], transfo = function(x) x, col = palette.gray)
> image(soy.ab[,2], transfo = function(x) x, col = palette.gray)
> image(soy.ab[,3], transfo = function(x) x, col = palette.gray)
> image(soy.ab[,4], transfo = function(x) x, col = palette.gray)
> image(soy.ab[,5], transfo = function(x) x, col = palette.gray)
> image(soy.ab[,6], transfo = function(x) x, col = palette.gray)
> mtext ("> image(soy.ab[,i=1:6], transfo = function(x) x, col = palette.gray)", side = 3, outer = T, cex = .8)
> 
> 
> ## Construct individual image plots using log intensities
> ## Note: that these plots appear lighter; they are log transformed
> ##
> ## This allows us to see a single microarray chip.
> par(mfrow = c(1,1))
> 
> palette.gray <- c(rep(gray(0:10/10), times = seq(1,41, by = 4)))
> image(soy.ab[,1],  col = palette.gray)
> title(sub = "> image(soy.ab[,1],  col = palette.gray)")
> 
> ## Repeat above commands with .. soy.ab[,i], i = 2-6
> 
> ## Display all six log-transformed image plots in one plot
> par(oma = c(3,1,3,1))
> par(mfrow = c(2,3))
> image(soy.ab[,1],  col = palette.gray)
> image(soy.ab[,2],  col = palette.gray)
> image(soy.ab[,3],  col = palette.gray)
> image(soy.ab[,4],  col = palette.gray)
> image(soy.ab[,5],  col = palette.gray)
> image(soy.ab[,6],  col = palette.gray)
> mtext("Image Plots - Hawaii/Resistant vs. Taiwan/Susceptible", side = 3, outer = T)
> mtext("Top Row: Hawaii/Resistant - Bottom Row: Taiwan/Susceptible", side = 1, outer = T)
> 
> ## Reset to single screen.
> par(mfrow = c(1,1))
> 
> 
> warnings()
Warning message:
xy2i is deprecated and will be removed in the next BioC release.
Use xy2indices in the affy package instead.
>