R version 2.9.2 (2009-08-24) Copyright (C) 2009 The R Foundation for Statistical Computing ISBN 3-900051-07-0 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. - MKL libraries for acclerated math installed: see help (setMKLthreads) - ParallelR packages installed: see help (package='foreach') Type 'revo()' to visit www.revolution-computing.com for the latest REvolution R news, 'forum()' for the community forum, or 'readme()' for release notes. > png(filename="limma.r%02d.png") > palette.gray <- c(rep(gray(0:10/10), times = seq(1,41, by = 4))) > > library('RColorBrewer') > brewer.cols <- brewer.pal(6, 'Set1') > > > ### Load the appropriate library for reading affy data, and inspect the data. > library( 'affy' ) > soy.ab <- ReadAffy( 'geo_data/GSM209576.CEL.gz', + 'geo_data/GSM209585.CEL.gz', + 'geo_data/GSM209594.CEL.gz', + 'geo_data/GSM209577.CEL.gz', + 'geo_data/GSM209586.CEL.gz', + 'geo_data/GSM209595.CEL.gz', + + ## we have gz files which R can read in. + compress=TRUE) > > > > ## Inspect the loaded data. This will make a network connection first time > ## around and can be slow. > soy.ab AffyBatch object size of arrays=1164x1164 features (8 kb) cdf=Soybean (61170 affyids) number of samples=6 number of genes=61170 annotation=soybean notes= > > ## Check out the names of the samples. > sampleNames( soy.ab ) [1] "GSM209576.CEL.gz" "GSM209585.CEL.gz" "GSM209594.CEL.gz" "GSM209577.CEL.gz" [5] "GSM209586.CEL.gz" "GSM209595.CEL.gz" > > > > ## as the current sample names refer to the original files, we change this for > ## something more, er, easy to remember. > new.sampleNames <- c('hr.a3.12','hr.b3.12','hr.c3.12', + 'ts.a4.12','ts.b4.12','ts.c4.12') > sampleNames(soy.ab) <- new.sampleNames > > ## and check that it has worked > sampleNames( soy.ab ) [1] "hr.a3.12" "hr.b3.12" "hr.c3.12" "ts.a4.12" "ts.b4.12" "ts.c4.12" > > > > ## > ## We are trying to do some subsetting because not all of the probes on the > ## chip are from soy > > > ## read in another data frame called Species.Affy.ID. > ## this links species names to affy ids. > Species.Affy.ID <- read.table('SpeciesAffyID.txt', header = T, sep = "") > dim(Species.Affy.ID) [1] 61170 2 > > > > load( 'SoybeanCutObjects.RData' ) > > tv.for.glycine.max <- Species.Affy.ID$species == 'Glycine max' > table( tv.for.glycine.max ) tv.for.glycine.max FALSE TRUE 23426 37744 > listOutProbeSets <- Species.Affy.ID$affyID[ tv.for.glycine.max==FALSE ] > > length( listOutProbeSets ) [1] 23426 > is.factor( listOutProbeSets ) [1] TRUE > > ## Create a character vector for listOutProbeSets > ## One way: rename listOutProbeSets as a character vector > listOutProbeSets <- as.character(listOutProbeSets) > > ## Confirm that listOutProbeSets is a character vector > is.character(listOutProbeSets) [1] TRUE > > ## check object > soy.ab AffyBatch object size of arrays=1164x1164 features (8 kb) cdf=Soybean (61170 affyids) number of samples=6 number of genes=61170 annotation=soybean notes= > > > ## this is the bit which actually removes the stuff we are not intereste > RemoveProbes(listOutProbes=NULL, listOutProbeSets, cdfpackagename, probepackagename) > > ## Check that the object has less IDs now. There should be 37444. > soy.ab AffyBatch object size of arrays=1164x1164 features (8 kb) cdf=Soybean (37744 affyids) number of samples=6 number of genes=37744 annotation=soybean notes= > > # Start preparation for phenoData slot in AffyBatch object > pd <- data.frame(population = c(1,1,1,2,2,2), replicate = c(1,2,3,1,2,3)) > > # Display contents of pd > pd population replicate 1 1 1 2 1 2 3 1 3 4 2 1 5 2 2 6 2 3 > > # Assign the sampleNames(soy.ab) to the rownames of pd > rownames(pd) <- sampleNames(soy.ab) > > # Display contents of pd again, notice change in rownames > pd population replicate hr.a3.12 1 1 hr.b3.12 1 2 hr.c3.12 1 3 ts.a4.12 2 1 ts.b4.12 2 2 ts.c4.12 2 3 > > ## Continue preparation for phenoData slot > metaData <- data.frame(labelDescription = c( 'population', 'replicate' )) > > ## Establish new phenoData slot > phenoData(soy.ab) <- new( 'AnnotatedDataFrame', data = pd, varMetadata = metaData) > > ## Display pData(soy.ab) > pData(soy.ab) population replicate hr.a3.12 1 1 hr.b3.12 1 2 hr.c3.12 1 3 ts.a4.12 2 1 ts.b4.12 2 2 ts.c4.12 2 3 > > ## Display phenoData(soy.ab) > phenoData(soy.ab) An object of class "AnnotatedDataFrame" rowNames: hr.a3.12, hr.b3.12, ..., ts.c4.12 (6 total) varLabels and varMetadata description: population: population replicate: replicate > > # Execute RMA procedure on soy.ab object; assign to object eset > eset <- rma(soy.ab) Background correcting Normalizing Calculating Expression > > # Display eset > eset ExpressionSet (storageMode: lockedEnvironment) assayData: 37744 features, 6 samples element names: exprs phenoData sampleNames: hr.a3.12, hr.b3.12, ..., ts.c4.12 (6 total) varLabels and varMetadata description: population: population replicate: replicate featureData featureNames: AFFX-BioB-3_at, AFFX-BioB-5_at, ..., soybean_rRNA_918_RC_at (37744 total) fvarLabels and fvarMetadata description: none experimentData: use 'experimentData(object)' Annotation: soybean > > # Create an object that contains exprs(eset), named exprs.eset > exprs.eset <- exprs(eset) > > # Create index values – Index1 for Hawaii/Resistant, > # Index2 for Taiwan/Susceptible > Index1 <- 1:3 > Index2 <- 4:6 > > # Compute Difference vector for rowMeans between H/R and T/S > Difference <- rowMeans(exprs.eset[,Index1]) -rowMeans(exprs.eset[,Index2]) > > # Also compute the Average for each row > Average <- rowMeans(exprs.eset) > > # Create data frame for matrix exprs.eset > exprs.eset.df <- data.frame(exprs.eset) > > > # Construct expression set boxplots following RMA > par(oma = c(1,1,3,1)) > boxplot(exprs.eset.df, col = brewer.cols) > mtext('Boxplots Soybean(Glycine max subset) RMA Gene Expression Data', side = 3, outer = T) > > # Construct MA-Plot, Difference vs. Average > plot(Average, Difference) > lines(lowess(Average, Difference), col = 'red', lwd = 4) > abline( h = -2) > abline( h = 2) > title(sub = "> lines(lowess(Average, Difference), col = 'red', lwd = 4)") > mtext('MA-Plot, Difference vs. Average, Soybean (H/R & T/S)', outer = T, side = 3) > > > > # Compute ordinary t statistics > library('genefilter') > tt <- rowttests(exprs.eset, factor(eset$population)) > names(tt) [1] "statistic" "dm" "p.value" > > # Check that length of tt$statistic is 37744 > length(tt$statistic) [1] 37744 > > > ## Start the construction of the Volcano Plot > ## Construct the lod scores... > lod <- -log10(tt$p.value) > o1 <- order(abs(Difference), decreasing = TRUE)[1:50] > o2 <- order(abs(tt$statistic), decreasing = TRUE)[1:50] > > # Construct union and intersection of sets > o <- union(o1, o2) > i <- intersect(o1, o2) > > # Display i; note, we have 4 intersects > i [1] 12981 5534 17528 11942 > > library(limma) > > # Construct population.groups for model design > population.groups <- factor(c(rep('Taiwan/Susceptible',3), rep('Hawaii/Resistant',3))) > design <- model.matrix( ~ population.groups ) > > # Display design > design (Intercept) population.groupsTaiwan/Susceptible 1 1 1 2 1 1 3 1 1 4 1 0 5 1 0 6 1 0 attr(,"assign") [1] 0 1 attr(,"contrasts") attr(,"contrasts")$population.groups [1] "contr.treatment" > > # Fit linear model > fit <- lmFit(eset, design) > > # Check the dimension of fit$coeff; should be 37744 x 2 > dim(fit$coeff) [1] 37744 2 > > # Execute eBayes to obtain attenuated t statistics > fit.eBayes <- eBayes(fit) > > # Check dimension of fit.eBayes$t; should be 37744 x 2 > dim(fit.eBayes$t) [1] 37744 2 > > # Compute statistics necessary for Volcano Plot > # using attenuated t statistics > lodd <- -log10(fit.eBayes$p.value[,2]) > oo2 <- order(abs(fit.eBayes$t[,2]), decreasing = TRUE)[1:50] > oo <- union(o1, oo2) > ii <- intersect(o1, oo2) > > # Display ii > ii [1] 36732 12981 36376 5534 36731 17528 11942 36377 31789 > > # Construct Volcano plot using attenuated t statistics > plot(Difference[-oo], lodd[-oo], cex = .25, xlim = c(-3,3), ylim = range(lodd), xlab = 'Average (log) Fold-change', ylab = 'LOD score - Negative log10 of P-value') > points(Difference[o1], lodd[o1], pch = 18, col = 'blue', cex = 1.5) > points(Difference[oo2], lodd[oo2], pch = 1, col ='red',cex = 2,lwd = 2) > abline(h = 3) > title('Volcano Plot with moderated t statistics') > text(-2, 3.2, 'p < 0.001') > text(1, 4, 'Nine intersects') > > warnings() Warning message: xy2i is deprecated and will be removed in the next BioC release. Use xy2indices in the affy package instead. >