R version 2.9.2 (2009-08-24)
Copyright (C) 2009 The R Foundation for Statistical Computing
ISBN 3-900051-07-0

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

 - MKL libraries for acclerated math installed: see help (setMKLthreads)
 - ParallelR packages installed: see help (package='foreach')

Type 'revo()' to visit www.revolution-computing.com for the latest
REvolution R news, 'forum()' for the community forum, or 'readme()'
for release notes.

> png(filename="affy_quality_assement.r%02d.png")
> 
> ### Load the appropriate library for reading affy data, and inspect the data.
> library( 'affy' )
> soy.ab <- ReadAffy( 'geo_data/GSM209576.CEL.gz',
+                    'geo_data/GSM209585.CEL.gz',
+                    'geo_data/GSM209594.CEL.gz',
+                    'geo_data/GSM209577.CEL.gz',
+                    'geo_data/GSM209586.CEL.gz',
+                    'geo_data/GSM209595.CEL.gz',
+ 
+                    ## we have gz files which R can read in.
+                    compress=TRUE)
> 
> 
> 
> ## Inspect the loaded data. This will make a network connection first time
> ## around and can be slow.
> soy.ab
AffyBatch object
size of arrays=1164x1164 features (8 kb)
cdf=Soybean (61170 affyids)
number of samples=6
number of genes=61170
annotation=soybean
notes=  
> 
> ## Check out the names of the samples.
> sampleNames( soy.ab )
[1] "GSM209576.CEL.gz" "GSM209585.CEL.gz" "GSM209594.CEL.gz" "GSM209577.CEL.gz"
[5] "GSM209586.CEL.gz" "GSM209595.CEL.gz"
> 
> 
> 
> ## as the current sample names refer to the original files, we change this for
> ## something more, er, easy to remember.
> new.sampleNames <- c('hr.a3.12','hr.b3.12','hr.c3.12',
+                      'ts.a4.12','ts.b4.12','ts.c4.12')
> sampleNames(soy.ab) <- new.sampleNames
> 
> ## and check that it has worked
> sampleNames( soy.ab )
[1] "hr.a3.12" "hr.b3.12" "hr.c3.12" "ts.a4.12" "ts.b4.12" "ts.c4.12"
> 
> 
> 
> ##
> ## We are trying to do some subsetting because not all of the probes on the
> ## chip are from soy
> 
> 
> ## read in another data frame called Species.Affy.ID.
> ## this links species names to affy ids.
> Species.Affy.ID <- read.table('SpeciesAffyID.txt', header = T, sep = "")
> dim(Species.Affy.ID)
[1] 61170     2
> 
> 
> 
> load( 'SoybeanCutObjects.RData' )
> 
> tv.for.glycine.max <- Species.Affy.ID$species == 'Glycine max'
> table( tv.for.glycine.max )
tv.for.glycine.max
FALSE  TRUE 
23426 37744 
> listOutProbeSets <- Species.Affy.ID$affyID[ tv.for.glycine.max==FALSE ]
> 
> length( listOutProbeSets )
[1] 23426
> is.factor( listOutProbeSets )
[1] TRUE
> 
> ## Create a character vector for listOutProbeSets
> ## One way: rename listOutProbeSets as a character vector
> listOutProbeSets <- as.character(listOutProbeSets)
> 
> ## Confirm that listOutProbeSets is a character vector
> is.character(listOutProbeSets)
[1] TRUE
> 
> ## check object
> soy.ab
AffyBatch object
size of arrays=1164x1164 features (8 kb)
cdf=Soybean (61170 affyids)
number of samples=6
number of genes=61170
annotation=soybean
notes=  
> 
> 
> ## this is the bit which actually removes the stuff we are not intereste
> RemoveProbes(listOutProbes=NULL, listOutProbeSets, cdfpackagename, probepackagename)
> 
> ## Check that the object has less IDs now. There should be 37444.
> soy.ab
AffyBatch object
size of arrays=1164x1164 features (8 kb)
cdf=Soybean (37744 affyids)
number of samples=6
number of genes=37744
annotation=soybean
notes=  
> 
> # Start preparation for phenoData slot in AffyBatch object
> pd <- data.frame(population = c(1,1,1,2,2,2), replicate = c(1,2,3,1,2,3))
> 
> # Display contents of pd
> pd
  population replicate
1          1         1
2          1         2
3          1         3
4          2         1
5          2         2
6          2         3
> 
> # Assign the sampleNames(soy.ab) to the rownames of pd
> rownames(pd) <- sampleNames(soy.ab)
> 
> # Display contents of pd again, notice change in rownames
> pd
         population replicate
hr.a3.12          1         1
hr.b3.12          1         2
hr.c3.12          1         3
ts.a4.12          2         1
ts.b4.12          2         2
ts.c4.12          2         3
> 
> ## Continue preparation for phenoData slot
> metaData <- data.frame(labelDescription = c( 'population', 'replicate' ))
> 
> ## Establish new phenoData slot
> phenoData(soy.ab) <- new( 'AnnotatedDataFrame', data = pd, varMetadata = metaData)
> 
> ## Display pData(soy.ab)
> pData(soy.ab)
         population replicate
hr.a3.12          1         1
hr.b3.12          1         2
hr.c3.12          1         3
ts.a4.12          2         1
ts.b4.12          2         2
ts.c4.12          2         3
> 
> ## Display phenoData(soy.ab)
> phenoData(soy.ab)
An object of class "AnnotatedDataFrame"
  rowNames: hr.a3.12, hr.b3.12, ..., ts.c4.12  (6 total)
  varLabels and varMetadata description:
    population: population
    replicate: replicate
> 
> library( 'simpleaffy' )
> 
> ## TODO this section is broken
> 
> ## # Load (source in) function for quality control metrics revised
> ## # by W. Gregory Alvord and Octavio Quinones to work# with
> ## # soybeancdf
> source('soybean.mod.qc.affy.r')
> 
> ## # Execute soybean.mod.qc.affy() on object soy.ab
> ##soy.ab.qc <- soybean.mod.qc.affy(soy.ab)
> 
> ## # Assign qc metrics to objects
> ## avbg.soy.ab.qc <- avbg(soy.ab.qc)
> ## minbg.soy.ab.qc <- minbg(soy.ab.qc)
> ## maxbg.soy.ab.qc <- maxbg(soy.ab.qc)
> ## sfs.soy.ab.qc <- sfs(soy.ab.qc)
> ## percent.present.soy.ab.qc <- percent.present(soy.ab.qc)
> 
> ## # Display objects (here, rounded to 2 decimal places)
> ## round(minbg.soy.ab.qc ,2)
> ## round(maxbg.soy.ab.qc ,2)
> ## round(avbg.soy.ab.qc ,2)
> ## round(sfs.soy.ab.qc ,2)
> ## round(percent.present.soy.ab.qc,2)
> 
> 
> warnings()
Warning message:
xy2i is deprecated and will be removed in the next BioC release.
Use xy2indices in the affy package instead.
>