R version 2.9.2 (2009-08-24) Copyright (C) 2009 The R Foundation for Statistical Computing ISBN 3-900051-07-0 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. - MKL libraries for acclerated math installed: see help (setMKLthreads) - ParallelR packages installed: see help (package='foreach') Type 'revo()' to visit www.revolution-computing.com for the latest REvolution R news, 'forum()' for the community forum, or 'readme()' for release notes. > png(filename="heatmaps.r%02d.png") > palette.gray <- c(rep(gray(0:10/10), times = seq(1,41, by = 4))) > > library('RColorBrewer') > brewer.cols <- brewer.pal(6, 'Set1') > > > ### Load the appropriate library for reading affy data, and inspect the data. > library( 'affy' ) > soy.ab <- ReadAffy( 'geo_data/GSM209576.CEL.gz', + 'geo_data/GSM209585.CEL.gz', + 'geo_data/GSM209594.CEL.gz', + 'geo_data/GSM209577.CEL.gz', + 'geo_data/GSM209586.CEL.gz', + 'geo_data/GSM209595.CEL.gz', + + ## we have gz files which R can read in. + compress=TRUE) > > > > ## Inspect the loaded data. This will make a network connection first time > ## around and can be slow. > soy.ab AffyBatch object size of arrays=1164x1164 features (8 kb) cdf=Soybean (61170 affyids) number of samples=6 number of genes=61170 annotation=soybean notes= > > ## Check out the names of the samples. > sampleNames( soy.ab ) [1] "GSM209576.CEL.gz" "GSM209585.CEL.gz" "GSM209594.CEL.gz" "GSM209577.CEL.gz" [5] "GSM209586.CEL.gz" "GSM209595.CEL.gz" > > > > ## as the current sample names refer to the original files, we change this for > ## something more, er, easy to remember. > new.sampleNames <- c('hr.a3.12','hr.b3.12','hr.c3.12', + 'ts.a4.12','ts.b4.12','ts.c4.12') > sampleNames(soy.ab) <- new.sampleNames > > ## and check that it has worked > sampleNames( soy.ab ) [1] "hr.a3.12" "hr.b3.12" "hr.c3.12" "ts.a4.12" "ts.b4.12" "ts.c4.12" > > > > ## > ## We are trying to do some subsetting because not all of the probes on the > ## chip are from soy > > > ## read in another data frame called Species.Affy.ID. > ## this links species names to affy ids. > Species.Affy.ID <- read.table('SpeciesAffyID.txt', header = T, sep = "") > dim(Species.Affy.ID) [1] 61170 2 > > > > load( 'SoybeanCutObjects.RData' ) > > tv.for.glycine.max <- Species.Affy.ID$species == 'Glycine max' > table( tv.for.glycine.max ) tv.for.glycine.max FALSE TRUE 23426 37744 > listOutProbeSets <- Species.Affy.ID$affyID[ tv.for.glycine.max==FALSE ] > > length( listOutProbeSets ) [1] 23426 > is.factor( listOutProbeSets ) [1] TRUE > > ## Create a character vector for listOutProbeSets > ## One way: rename listOutProbeSets as a character vector > listOutProbeSets <- as.character(listOutProbeSets) > > ## Confirm that listOutProbeSets is a character vector > is.character(listOutProbeSets) [1] TRUE > > ## check object > soy.ab AffyBatch object size of arrays=1164x1164 features (8 kb) cdf=Soybean (61170 affyids) number of samples=6 number of genes=61170 annotation=soybean notes= > > > ## this is the bit which actually removes the stuff we are not intereste > RemoveProbes(listOutProbes=NULL, listOutProbeSets, cdfpackagename, probepackagename) > > ## Check that the object has less IDs now. There should be 37444. > soy.ab AffyBatch object size of arrays=1164x1164 features (8 kb) cdf=Soybean (37744 affyids) number of samples=6 number of genes=37744 annotation=soybean notes= > > # Start preparation for phenoData slot in AffyBatch object > pd <- data.frame(population = c(1,1,1,2,2,2), replicate = c(1,2,3,1,2,3)) > > # Display contents of pd > pd population replicate 1 1 1 2 1 2 3 1 3 4 2 1 5 2 2 6 2 3 > > # Assign the sampleNames(soy.ab) to the rownames of pd > rownames(pd) <- sampleNames(soy.ab) > > # Display contents of pd again, notice change in rownames > pd population replicate hr.a3.12 1 1 hr.b3.12 1 2 hr.c3.12 1 3 ts.a4.12 2 1 ts.b4.12 2 2 ts.c4.12 2 3 > > ## Continue preparation for phenoData slot > metaData <- data.frame(labelDescription = c( 'population', 'replicate' )) > > ## Establish new phenoData slot > phenoData(soy.ab) <- new( 'AnnotatedDataFrame', data = pd, varMetadata = metaData) > > ## Display pData(soy.ab) > pData(soy.ab) population replicate hr.a3.12 1 1 hr.b3.12 1 2 hr.c3.12 1 3 ts.a4.12 2 1 ts.b4.12 2 2 ts.c4.12 2 3 > > ## Display phenoData(soy.ab) > phenoData(soy.ab) An object of class "AnnotatedDataFrame" rowNames: hr.a3.12, hr.b3.12, ..., ts.c4.12 (6 total) varLabels and varMetadata description: population: population replicate: replicate > > # Execute RMA procedure on soy.ab object; assign to object eset > eset <- rma(soy.ab) Background correcting Normalizing Calculating Expression > > # Display eset > eset ExpressionSet (storageMode: lockedEnvironment) assayData: 37744 features, 6 samples element names: exprs phenoData sampleNames: hr.a3.12, hr.b3.12, ..., ts.c4.12 (6 total) varLabels and varMetadata description: population: population replicate: replicate featureData featureNames: AFFX-BioB-3_at, AFFX-BioB-5_at, ..., soybean_rRNA_918_RC_at (37744 total) fvarLabels and fvarMetadata description: none experimentData: use 'experimentData(object)' Annotation: soybean > > # Create an object that contains exprs(eset), named exprs.eset > exprs.eset <- exprs(eset) > > # Create index values – Index1 for Hawaii/Resistant, > # Index2 for Taiwan/Susceptible > Index1 <- 1:3 > Index2 <- 4:6 > > # Compute Difference vector for rowMeans between H/R and T/S > Difference <- rowMeans(exprs.eset[,Index1]) -rowMeans(exprs.eset[,Index2]) > > # Also compute the Average for each row > Average <- rowMeans(exprs.eset) > > # Create data frame for matrix exprs.eset > exprs.eset.df <- data.frame(exprs.eset) > > > # Construct expression set boxplots following RMA > par(oma = c(1,1,3,1)) > boxplot(exprs.eset.df, col = brewer.cols) > mtext('Boxplots Soybean(Glycine max subset) RMA Gene Expression Data', side = 3, outer = T) > > # Construct MA-Plot, Difference vs. Average > plot(Average, Difference) > lines(lowess(Average, Difference), col = 'red', lwd = 4) > abline( h = -2) > abline( h = 2) > title(sub = "> lines(lowess(Average, Difference), col = 'red', lwd = 4)") > mtext('MA-Plot, Difference vs. Average, Soybean (H/R & T/S)', outer = T, side = 3) > > > > # Compute ordinary t statistics > library('genefilter') > tt <- rowttests(exprs.eset, factor(eset$population)) > names(tt) [1] "statistic" "dm" "p.value" > > # Check that length of tt$statistic is 37744 > length(tt$statistic) [1] 37744 > > > ## Start the construction of the Volcano Plot > ## Construct the lod scores... > lod <- -log10(tt$p.value) > o1 <- order(abs(Difference), decreasing = TRUE)[1:50] > o2 <- order(abs(tt$statistic), decreasing = TRUE)[1:50] > > # Construct union and intersection of sets > o <- union(o1, o2) > i <- intersect(o1, o2) > > # Display i; note, we have 4 intersects > i [1] 12981 5534 17528 11942 > > library(limma) > > # Construct population.groups for model design > population.groups <- factor(c(rep('Taiwan/Susceptible',3), rep('Hawaii/Resistant',3))) > design <- model.matrix( ~ population.groups ) > > # Display design > design (Intercept) population.groupsTaiwan/Susceptible 1 1 1 2 1 1 3 1 1 4 1 0 5 1 0 6 1 0 attr(,"assign") [1] 0 1 attr(,"contrasts") attr(,"contrasts")$population.groups [1] "contr.treatment" > > # Fit linear model > fit <- lmFit(eset, design) > > # Check the dimension of fit$coeff; should be 37744 x 2 > dim(fit$coeff) [1] 37744 2 > > # Execute eBayes to obtain attenuated t statistics > fit.eBayes <- eBayes(fit) > > # Check dimension of fit.eBayes$t; should be 37744 x 2 > dim(fit.eBayes$t) [1] 37744 2 > > # Compute statistics necessary for Volcano Plot > # using attenuated t statistics > lodd <- -log10(fit.eBayes$p.value[,2]) > oo2 <- order(abs(fit.eBayes$t[,2]), decreasing = TRUE)[1:50] > oo <- union(o1, oo2) > ii <- intersect(o1, oo2) > > # Display ii > ii [1] 36732 12981 36376 5534 36731 17528 11942 36377 31789 > > # Construct Volcano plot using attenuated t statistics > plot(Difference[-oo], lodd[-oo], cex = .25, xlim = c(-3,3), ylim = range(lodd), xlab = 'Average (log) Fold-change', ylab = 'LOD score - Negative log10 of P-value') > points(Difference[o1], lodd[o1], pch = 18, col = 'blue', cex = 1.5) > points(Difference[oo2], lodd[oo2], pch = 1, col ='red',cex = 2,lwd = 2) > abline(h = 3) > title('Volcano Plot with moderated t statistics') > text(-2, 3.2, 'p < 0.001') > text(1, 4, 'Nine intersects') > > # Construct the appropriate matrices/data frames for > # construction of the heat map. > ii.mat <- exprs.eset[ii,] > ii.df <- data.frame(ii.mat) > > # Dislplay ii.df > ii.df hr.a3.12 hr.b3.12 hr.c3.12 ts.a4.12 ts.b4.12 ts.c4.12 GmaAffx.92386.1.S1_at 7.606194 6.492420 6.848945 4.836672 4.547391 4.197930 Gma.7559.1.S1_s_at 9.234166 8.645221 8.172607 6.430099 6.729005 6.369990 GmaAffx.91805.1.S1_at 6.846458 6.456520 6.745441 4.516993 3.997653 5.142631 Gma.16735.2.S1_at 7.681348 6.926422 7.161862 5.528300 5.392914 5.087291 GmaAffx.92383.1.S1_at 8.106809 8.598193 8.657052 5.792102 6.765436 7.167904 GmaAffx.21211.1.S1_at 7.091915 6.790192 6.746073 5.014630 5.250789 4.935269 Gma.6606.1.S1_at 6.216499 5.541273 6.153452 4.464666 4.461783 4.103003 GmaAffx.91805.1.S1_s_at 5.609264 5.716167 6.090866 3.897915 4.021688 4.657927 GmaAffx.80951.1.S1_at 5.428331 6.232572 6.677944 4.590385 4.484427 4.426705 > > # Construct heatmap using reds and blues > library('RColorBrewer') > library('genefilter') > hmcol <- colorRampPalette(brewer.pal(10, 'RdBu'))(256) > tv.Hawaii.Resistant <- dimnames(ii.mat)[[2]][1:3] > spcol <- ifelse(dimnames(ii.mat)[[2]] == tv.Hawaii.Resistant, 'skyblue', 'goldenrod') > heatmap(ii.mat, col = hmcol, ColSideColors = spcol, margins = c(10,15)) > > # Construct heatmap using a grey (gray) scale > library('RColorBrewer') > library('genefilter') > hmcol <- colorRampPalette(brewer.pal(9, 'Greys'))(256) > tv.Hawaii.Resistant <- dimnames(ii.mat)[[2]][1:3] > spcol <- ifelse(dimnames(ii.mat)[[2]] == tv.Hawaii.Resistant, 'grey10', 'grey80') > heatmap(ii.mat, col = hmcol, ColSideColors = spcol, margins = c(10,15)) > > warnings() Warning message: xy2i is deprecated and will be removed in the next BioC release. Use xy2indices in the affy package instead. >