R version 2.9.2 (2009-08-24)
Copyright (C) 2009 The R Foundation for Statistical Computing
ISBN 3-900051-07-0

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

 - MKL libraries for acclerated math installed: see help (setMKLthreads)
 - ParallelR packages installed: see help (package='foreach')

Type 'revo()' to visit www.revolution-computing.com for the latest
REvolution R news, 'forum()' for the community forum, or 'readme()'
for release notes.

> png(filename="over_expressed.r%02d.png")
> palette.gray <- c(rep(gray(0:10/10), times = seq(1,41, by = 4)))
> 
> library('RColorBrewer')
> brewer.cols <- brewer.pal(6, 'Set1')
> 
> 
> ### Load the appropriate library for reading affy data, and inspect the data.
> library( 'affy' )
> soy.ab <- ReadAffy( 'geo_data/GSM209576.CEL.gz',
+                    'geo_data/GSM209585.CEL.gz',
+                    'geo_data/GSM209594.CEL.gz',
+                    'geo_data/GSM209577.CEL.gz',
+                    'geo_data/GSM209586.CEL.gz',
+                    'geo_data/GSM209595.CEL.gz',
+ 
+                    ## we have gz files which R can read in.
+                    compress=TRUE)
> 
> 
> 
> ## Inspect the loaded data. This will make a network connection first time
> ## around and can be slow.
> soy.ab
AffyBatch object
size of arrays=1164x1164 features (8 kb)
cdf=Soybean (61170 affyids)
number of samples=6
number of genes=61170
annotation=soybean
notes=  
> 
> ## Check out the names of the samples.
> sampleNames( soy.ab )
[1] "GSM209576.CEL.gz" "GSM209585.CEL.gz" "GSM209594.CEL.gz" "GSM209577.CEL.gz"
[5] "GSM209586.CEL.gz" "GSM209595.CEL.gz"
> 
> 
> 
> ## as the current sample names refer to the original files, we change this for
> ## something more, er, easy to remember.
> new.sampleNames <- c('hr.a3.12','hr.b3.12','hr.c3.12',
+                      'ts.a4.12','ts.b4.12','ts.c4.12')
> sampleNames(soy.ab) <- new.sampleNames
> 
> ## and check that it has worked
> sampleNames( soy.ab )
[1] "hr.a3.12" "hr.b3.12" "hr.c3.12" "ts.a4.12" "ts.b4.12" "ts.c4.12"
> 
> 
> 
> ##
> ## We are trying to do some subsetting because not all of the probes on the
> ## chip are from soy
> 
> 
> ## read in another data frame called Species.Affy.ID.
> ## this links species names to affy ids.
> Species.Affy.ID <- read.table('SpeciesAffyID.txt', header = T, sep = "")
> dim(Species.Affy.ID)
[1] 61170     2
> 
> 
> 
> load( 'SoybeanCutObjects.RData' )
> 
> tv.for.glycine.max <- Species.Affy.ID$species == 'Glycine max'
> table( tv.for.glycine.max )
tv.for.glycine.max
FALSE  TRUE 
23426 37744 
> listOutProbeSets <- Species.Affy.ID$affyID[ tv.for.glycine.max==FALSE ]
> 
> length( listOutProbeSets )
[1] 23426
> is.factor( listOutProbeSets )
[1] TRUE
> 
> ## Create a character vector for listOutProbeSets
> ## One way: rename listOutProbeSets as a character vector
> listOutProbeSets <- as.character(listOutProbeSets)
> 
> ## Confirm that listOutProbeSets is a character vector
> is.character(listOutProbeSets)
[1] TRUE
> 
> ## check object
> soy.ab
AffyBatch object
size of arrays=1164x1164 features (8 kb)
cdf=Soybean (61170 affyids)
number of samples=6
number of genes=61170
annotation=soybean
notes=  
> 
> 
> ## this is the bit which actually removes the stuff we are not intereste
> RemoveProbes(listOutProbes=NULL, listOutProbeSets, cdfpackagename, probepackagename)
> 
> ## Check that the object has less IDs now. There should be 37444.
> soy.ab
AffyBatch object
size of arrays=1164x1164 features (8 kb)
cdf=Soybean (37744 affyids)
number of samples=6
number of genes=37744
annotation=soybean
notes=  
> 
> # Start preparation for phenoData slot in AffyBatch object
> pd <- data.frame(population = c(1,1,1,2,2,2), replicate = c(1,2,3,1,2,3))
> 
> # Display contents of pd
> pd
  population replicate
1          1         1
2          1         2
3          1         3
4          2         1
5          2         2
6          2         3
> 
> # Assign the sampleNames(soy.ab) to the rownames of pd
> rownames(pd) <- sampleNames(soy.ab)
> 
> # Display contents of pd again, notice change in rownames
> pd
         population replicate
hr.a3.12          1         1
hr.b3.12          1         2
hr.c3.12          1         3
ts.a4.12          2         1
ts.b4.12          2         2
ts.c4.12          2         3
> 
> ## Continue preparation for phenoData slot
> metaData <- data.frame(labelDescription = c( 'population', 'replicate' ))
> 
> ## Establish new phenoData slot
> phenoData(soy.ab) <- new( 'AnnotatedDataFrame', data = pd, varMetadata = metaData)
> 
> ## Display pData(soy.ab)
> pData(soy.ab)
         population replicate
hr.a3.12          1         1
hr.b3.12          1         2
hr.c3.12          1         3
ts.a4.12          2         1
ts.b4.12          2         2
ts.c4.12          2         3
> 
> ## Display phenoData(soy.ab)
> phenoData(soy.ab)
An object of class "AnnotatedDataFrame"
  rowNames: hr.a3.12, hr.b3.12, ..., ts.c4.12  (6 total)
  varLabels and varMetadata description:
    population: population
    replicate: replicate
> 
> # Execute RMA procedure on soy.ab object; assign to object eset
> eset <- rma(soy.ab)
Background correcting
Normalizing
Calculating Expression
> 
> # Display eset
> eset
ExpressionSet (storageMode: lockedEnvironment)
assayData: 37744 features, 6 samples 
  element names: exprs 
phenoData
  sampleNames: hr.a3.12, hr.b3.12, ..., ts.c4.12  (6 total)
  varLabels and varMetadata description:
    population: population
    replicate: replicate
featureData
  featureNames: AFFX-BioB-3_at, AFFX-BioB-5_at, ..., soybean_rRNA_918_RC_at  (37744 total)
  fvarLabels and fvarMetadata description: none
experimentData: use 'experimentData(object)'
Annotation: soybean 
> 
> # Create an object that contains exprs(eset), named exprs.eset
> exprs.eset <- exprs(eset)
> 
> # Create index values – Index1 for Hawaii/Resistant,
> # Index2 for Taiwan/Susceptible
> Index1 <- 1:3
> Index2 <- 4:6
> 
> # Compute Difference vector for rowMeans between H/R and T/S
> Difference <- rowMeans(exprs.eset[,Index1]) -rowMeans(exprs.eset[,Index2])
> 
> # Also compute the Average for each row
> Average <- rowMeans(exprs.eset)
> 
> # Create data frame for matrix exprs.eset
> exprs.eset.df <- data.frame(exprs.eset)
> 
> 
> # Construct expression set boxplots following RMA
> par(oma = c(1,1,3,1))
> boxplot(exprs.eset.df, col = brewer.cols)
> mtext('Boxplots Soybean(Glycine max subset) RMA Gene Expression Data', side = 3, outer = T)
> 
> # Construct MA-Plot, Difference vs. Average
> plot(Average, Difference)
> lines(lowess(Average, Difference), col = 'red', lwd = 4)
> abline( h = -2)
> abline( h = 2)
> title(sub = "> lines(lowess(Average, Difference), col = 'red', lwd = 4)")
> mtext('MA-Plot, Difference vs. Average, Soybean (H/R & T/S)', outer = T, side = 3)
> 
> 
> 
> # Compute ordinary t statistics
> library('genefilter')
> tt <- rowttests(exprs.eset, factor(eset$population))
> names(tt)
[1] "statistic" "dm"        "p.value"  
> 
> # Check that length of tt$statistic is 37744
> length(tt$statistic)
[1] 37744
> 
> 
> warnings()
Warning message:
xy2i is deprecated and will be removed in the next BioC release.
Use xy2indices in the affy package instead.
>