R version 2.9.2 (2009-08-24)
Copyright (C) 2009 The R Foundation for Statistical Computing
ISBN 3-900051-07-0

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

 - MKL libraries for acclerated math installed: see help (setMKLthreads)
 - ParallelR packages installed: see help (package='foreach')

Type 'revo()' to visit www.revolution-computing.com for the latest
REvolution R news, 'forum()' for the community forum, or 'readme()'
for release notes.

> png(filename="heatmaps.r%02d.png")
> palette.gray <- c(rep(gray(0:10/10), times = seq(1,41, by = 4)))
> 
> library('RColorBrewer')
> brewer.cols <- brewer.pal(6, 'Set1')
> 
> 
> ### Load the appropriate library for reading affy data, and inspect the data.
> library( 'affy' )
> soy.ab <- ReadAffy( 'geo_data/GSM209576.CEL.gz',
+                    'geo_data/GSM209585.CEL.gz',
+                    'geo_data/GSM209594.CEL.gz',
+                    'geo_data/GSM209577.CEL.gz',
+                    'geo_data/GSM209586.CEL.gz',
+                    'geo_data/GSM209595.CEL.gz',
+ 
+                    ## we have gz files which R can read in.
+                    compress=TRUE)
> 
> 
> 
> ## Inspect the loaded data. This will make a network connection first time
> ## around and can be slow.
> soy.ab
AffyBatch object
size of arrays=1164x1164 features (8 kb)
cdf=Soybean (61170 affyids)
number of samples=6
number of genes=61170
annotation=soybean
notes=  
> 
> ## Check out the names of the samples.
> sampleNames( soy.ab )
[1] "GSM209576.CEL.gz" "GSM209585.CEL.gz" "GSM209594.CEL.gz" "GSM209577.CEL.gz"
[5] "GSM209586.CEL.gz" "GSM209595.CEL.gz"
> 
> 
> 
> ## as the current sample names refer to the original files, we change this for
> ## something more, er, easy to remember.
> new.sampleNames <- c('hr.a3.12','hr.b3.12','hr.c3.12',
+                      'ts.a4.12','ts.b4.12','ts.c4.12')
> sampleNames(soy.ab) <- new.sampleNames
> 
> ## and check that it has worked
> sampleNames( soy.ab )
[1] "hr.a3.12" "hr.b3.12" "hr.c3.12" "ts.a4.12" "ts.b4.12" "ts.c4.12"
> 
> 
> 
> ##
> ## We are trying to do some subsetting because not all of the probes on the
> ## chip are from soy
> 
> 
> ## read in another data frame called Species.Affy.ID.
> ## this links species names to affy ids.
> Species.Affy.ID <- read.table('SpeciesAffyID.txt', header = T, sep = "")
> dim(Species.Affy.ID)
[1] 61170     2
> 
> 
> 
> load( 'SoybeanCutObjects.RData' )
> 
> tv.for.glycine.max <- Species.Affy.ID$species == 'Glycine max'
> table( tv.for.glycine.max )
tv.for.glycine.max
FALSE  TRUE 
23426 37744 
> listOutProbeSets <- Species.Affy.ID$affyID[ tv.for.glycine.max==FALSE ]
> 
> length( listOutProbeSets )
[1] 23426
> is.factor( listOutProbeSets )
[1] TRUE
> 
> ## Create a character vector for listOutProbeSets
> ## One way: rename listOutProbeSets as a character vector
> listOutProbeSets <- as.character(listOutProbeSets)
> 
> ## Confirm that listOutProbeSets is a character vector
> is.character(listOutProbeSets)
[1] TRUE
> 
> ## check object
> soy.ab
AffyBatch object
size of arrays=1164x1164 features (8 kb)
cdf=Soybean (61170 affyids)
number of samples=6
number of genes=61170
annotation=soybean
notes=  
> 
> 
> ## this is the bit which actually removes the stuff we are not intereste
> RemoveProbes(listOutProbes=NULL, listOutProbeSets, cdfpackagename, probepackagename)
> 
> ## Check that the object has less IDs now. There should be 37444.
> soy.ab
AffyBatch object
size of arrays=1164x1164 features (8 kb)
cdf=Soybean (37744 affyids)
number of samples=6
number of genes=37744
annotation=soybean
notes=  
> 
> # Start preparation for phenoData slot in AffyBatch object
> pd <- data.frame(population = c(1,1,1,2,2,2), replicate = c(1,2,3,1,2,3))
> 
> # Display contents of pd
> pd
  population replicate
1          1         1
2          1         2
3          1         3
4          2         1
5          2         2
6          2         3
> 
> # Assign the sampleNames(soy.ab) to the rownames of pd
> rownames(pd) <- sampleNames(soy.ab)
> 
> # Display contents of pd again, notice change in rownames
> pd
         population replicate
hr.a3.12          1         1
hr.b3.12          1         2
hr.c3.12          1         3
ts.a4.12          2         1
ts.b4.12          2         2
ts.c4.12          2         3
> 
> ## Continue preparation for phenoData slot
> metaData <- data.frame(labelDescription = c( 'population', 'replicate' ))
> 
> ## Establish new phenoData slot
> phenoData(soy.ab) <- new( 'AnnotatedDataFrame', data = pd, varMetadata = metaData)
> 
> ## Display pData(soy.ab)
> pData(soy.ab)
         population replicate
hr.a3.12          1         1
hr.b3.12          1         2
hr.c3.12          1         3
ts.a4.12          2         1
ts.b4.12          2         2
ts.c4.12          2         3
> 
> ## Display phenoData(soy.ab)
> phenoData(soy.ab)
An object of class "AnnotatedDataFrame"
  rowNames: hr.a3.12, hr.b3.12, ..., ts.c4.12  (6 total)
  varLabels and varMetadata description:
    population: population
    replicate: replicate
> 
> # Execute RMA procedure on soy.ab object; assign to object eset
> eset <- rma(soy.ab)
Background correcting
Normalizing
Calculating Expression
> 
> # Display eset
> eset
ExpressionSet (storageMode: lockedEnvironment)
assayData: 37744 features, 6 samples 
  element names: exprs 
phenoData
  sampleNames: hr.a3.12, hr.b3.12, ..., ts.c4.12  (6 total)
  varLabels and varMetadata description:
    population: population
    replicate: replicate
featureData
  featureNames: AFFX-BioB-3_at, AFFX-BioB-5_at, ..., soybean_rRNA_918_RC_at  (37744 total)
  fvarLabels and fvarMetadata description: none
experimentData: use 'experimentData(object)'
Annotation: soybean 
> 
> # Create an object that contains exprs(eset), named exprs.eset
> exprs.eset <- exprs(eset)
> 
> # Create index values – Index1 for Hawaii/Resistant,
> # Index2 for Taiwan/Susceptible
> Index1 <- 1:3
> Index2 <- 4:6
> 
> # Compute Difference vector for rowMeans between H/R and T/S
> Difference <- rowMeans(exprs.eset[,Index1]) -rowMeans(exprs.eset[,Index2])
> 
> # Also compute the Average for each row
> Average <- rowMeans(exprs.eset)
> 
> # Create data frame for matrix exprs.eset
> exprs.eset.df <- data.frame(exprs.eset)
> 
> 
> # Construct expression set boxplots following RMA
> par(oma = c(1,1,3,1))
> boxplot(exprs.eset.df, col = brewer.cols)
> mtext('Boxplots Soybean(Glycine max subset) RMA Gene Expression Data', side = 3, outer = T)
> 
> # Construct MA-Plot, Difference vs. Average
> plot(Average, Difference)
> lines(lowess(Average, Difference), col = 'red', lwd = 4)
> abline( h = -2)
> abline( h = 2)
> title(sub = "> lines(lowess(Average, Difference), col = 'red', lwd = 4)")
> mtext('MA-Plot, Difference vs. Average, Soybean (H/R & T/S)', outer = T, side = 3)
> 
> 
> 
> # Compute ordinary t statistics
> library('genefilter')
> tt <- rowttests(exprs.eset, factor(eset$population))
> names(tt)
[1] "statistic" "dm"        "p.value"  
> 
> # Check that length of tt$statistic is 37744
> length(tt$statistic)
[1] 37744
> 
> 
> ## Start the construction of the Volcano Plot
> ## Construct the lod scores...
> lod <- -log10(tt$p.value)
> o1 <- order(abs(Difference), decreasing = TRUE)[1:50]
> o2 <- order(abs(tt$statistic), decreasing = TRUE)[1:50]
> 
> # Construct union and intersection of sets
> o <- union(o1, o2)
> i <- intersect(o1, o2)
> 
> # Display i; note, we have 4 intersects
> i
[1] 12981  5534 17528 11942
> 
> library(limma)
> 
> # Construct population.groups for model design
> population.groups <- factor(c(rep('Taiwan/Susceptible',3), rep('Hawaii/Resistant',3)))
> design <- model.matrix( ~ population.groups )
> 
> # Display design
> design
  (Intercept) population.groupsTaiwan/Susceptible
1           1                                   1
2           1                                   1
3           1                                   1
4           1                                   0
5           1                                   0
6           1                                   0
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$population.groups
[1] "contr.treatment"

> 
> # Fit linear model
> fit <- lmFit(eset, design)
> 
> # Check the dimension of fit$coeff; should be 37744 x 2
> dim(fit$coeff)
[1] 37744     2
> 
> # Execute eBayes to obtain attenuated t statistics
> fit.eBayes <- eBayes(fit)
> 
> # Check dimension of fit.eBayes$t; should be 37744 x 2
> dim(fit.eBayes$t)
[1] 37744     2
> 
> # Compute statistics necessary for Volcano Plot
> # using attenuated t statistics
> lodd <- -log10(fit.eBayes$p.value[,2])
> oo2 <- order(abs(fit.eBayes$t[,2]), decreasing = TRUE)[1:50]
> oo <- union(o1, oo2)
> ii <- intersect(o1, oo2)
> 
> # Display ii
> ii
[1] 36732 12981 36376  5534 36731 17528 11942 36377 31789
> 
> # Construct Volcano plot using attenuated t statistics
> plot(Difference[-oo], lodd[-oo], cex = .25, xlim = c(-3,3), ylim = range(lodd), xlab = 'Average (log) Fold-change', ylab = 'LOD score - Negative log10 of P-value')
> points(Difference[o1], lodd[o1], pch = 18, col = 'blue', cex = 1.5)
> points(Difference[oo2], lodd[oo2], pch = 1, col ='red',cex = 2,lwd = 2)
> abline(h = 3)
> title('Volcano Plot with moderated t statistics')
> text(-2, 3.2, 'p < 0.001')
> text(1, 4, 'Nine intersects')
> 
> # Construct the appropriate matrices/data frames for
> # construction of the heat map.
> ii.mat <- exprs.eset[ii,]
> ii.df <- data.frame(ii.mat)
> 
> # Dislplay ii.df
> ii.df
                        hr.a3.12 hr.b3.12 hr.c3.12 ts.a4.12 ts.b4.12 ts.c4.12
GmaAffx.92386.1.S1_at   7.606194 6.492420 6.848945 4.836672 4.547391 4.197930
Gma.7559.1.S1_s_at      9.234166 8.645221 8.172607 6.430099 6.729005 6.369990
GmaAffx.91805.1.S1_at   6.846458 6.456520 6.745441 4.516993 3.997653 5.142631
Gma.16735.2.S1_at       7.681348 6.926422 7.161862 5.528300 5.392914 5.087291
GmaAffx.92383.1.S1_at   8.106809 8.598193 8.657052 5.792102 6.765436 7.167904
GmaAffx.21211.1.S1_at   7.091915 6.790192 6.746073 5.014630 5.250789 4.935269
Gma.6606.1.S1_at        6.216499 5.541273 6.153452 4.464666 4.461783 4.103003
GmaAffx.91805.1.S1_s_at 5.609264 5.716167 6.090866 3.897915 4.021688 4.657927
GmaAffx.80951.1.S1_at   5.428331 6.232572 6.677944 4.590385 4.484427 4.426705
> 
> # Construct heatmap using reds and blues
> library('RColorBrewer')
> library('genefilter')
> hmcol <- colorRampPalette(brewer.pal(10, 'RdBu'))(256)
> tv.Hawaii.Resistant <- dimnames(ii.mat)[[2]][1:3]
> spcol <- ifelse(dimnames(ii.mat)[[2]] == tv.Hawaii.Resistant, 'skyblue', 'goldenrod')
> heatmap(ii.mat, col = hmcol, ColSideColors = spcol, margins = c(10,15))
> 
> # Construct heatmap using a grey (gray) scale
> library('RColorBrewer')
> library('genefilter')
> hmcol <- colorRampPalette(brewer.pal(9, 'Greys'))(256)
> tv.Hawaii.Resistant <- dimnames(ii.mat)[[2]][1:3]
> spcol <- ifelse(dimnames(ii.mat)[[2]] == tv.Hawaii.Resistant, 'grey10', 'grey80')
> heatmap(ii.mat, col = hmcol, ColSideColors = spcol, margins = c(10,15))
> 
> warnings()
Warning message:
xy2i is deprecated and will be removed in the next BioC release.
Use xy2indices in the affy package instead.
>