R version 2.9.2 (2009-08-24) Copyright (C) 2009 The R Foundation for Statistical Computing ISBN 3-900051-07-0 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. - MKL libraries for acclerated math installed: see help (setMKLthreads) - ParallelR packages installed: see help (package='foreach') Type 'revo()' to visit www.revolution-computing.com for the latest REvolution R news, 'forum()' for the community forum, or 'readme()' for release notes. > png(filename="affy_quality_assement.r%02d.png") > > ### Load the appropriate library for reading affy data, and inspect the data. > library( 'affy' ) > soy.ab <- ReadAffy( 'geo_data/GSM209576.CEL.gz', + 'geo_data/GSM209585.CEL.gz', + 'geo_data/GSM209594.CEL.gz', + 'geo_data/GSM209577.CEL.gz', + 'geo_data/GSM209586.CEL.gz', + 'geo_data/GSM209595.CEL.gz', + + ## we have gz files which R can read in. + compress=TRUE) > > > > ## Inspect the loaded data. This will make a network connection first time > ## around and can be slow. > soy.ab AffyBatch object size of arrays=1164x1164 features (8 kb) cdf=Soybean (61170 affyids) number of samples=6 number of genes=61170 annotation=soybean notes= > > ## Check out the names of the samples. > sampleNames( soy.ab ) [1] "GSM209576.CEL.gz" "GSM209585.CEL.gz" "GSM209594.CEL.gz" "GSM209577.CEL.gz" [5] "GSM209586.CEL.gz" "GSM209595.CEL.gz" > > > > ## as the current sample names refer to the original files, we change this for > ## something more, er, easy to remember. > new.sampleNames <- c('hr.a3.12','hr.b3.12','hr.c3.12', + 'ts.a4.12','ts.b4.12','ts.c4.12') > sampleNames(soy.ab) <- new.sampleNames > > ## and check that it has worked > sampleNames( soy.ab ) [1] "hr.a3.12" "hr.b3.12" "hr.c3.12" "ts.a4.12" "ts.b4.12" "ts.c4.12" > > > > ## > ## We are trying to do some subsetting because not all of the probes on the > ## chip are from soy > > > ## read in another data frame called Species.Affy.ID. > ## this links species names to affy ids. > Species.Affy.ID <- read.table('SpeciesAffyID.txt', header = T, sep = "") > dim(Species.Affy.ID) [1] 61170 2 > > > > load( 'SoybeanCutObjects.RData' ) > > tv.for.glycine.max <- Species.Affy.ID$species == 'Glycine max' > table( tv.for.glycine.max ) tv.for.glycine.max FALSE TRUE 23426 37744 > listOutProbeSets <- Species.Affy.ID$affyID[ tv.for.glycine.max==FALSE ] > > length( listOutProbeSets ) [1] 23426 > is.factor( listOutProbeSets ) [1] TRUE > > ## Create a character vector for listOutProbeSets > ## One way: rename listOutProbeSets as a character vector > listOutProbeSets <- as.character(listOutProbeSets) > > ## Confirm that listOutProbeSets is a character vector > is.character(listOutProbeSets) [1] TRUE > > ## check object > soy.ab AffyBatch object size of arrays=1164x1164 features (8 kb) cdf=Soybean (61170 affyids) number of samples=6 number of genes=61170 annotation=soybean notes= > > > ## this is the bit which actually removes the stuff we are not intereste > RemoveProbes(listOutProbes=NULL, listOutProbeSets, cdfpackagename, probepackagename) > > ## Check that the object has less IDs now. There should be 37444. > soy.ab AffyBatch object size of arrays=1164x1164 features (8 kb) cdf=Soybean (37744 affyids) number of samples=6 number of genes=37744 annotation=soybean notes= > > # Start preparation for phenoData slot in AffyBatch object > pd <- data.frame(population = c(1,1,1,2,2,2), replicate = c(1,2,3,1,2,3)) > > # Display contents of pd > pd population replicate 1 1 1 2 1 2 3 1 3 4 2 1 5 2 2 6 2 3 > > # Assign the sampleNames(soy.ab) to the rownames of pd > rownames(pd) <- sampleNames(soy.ab) > > # Display contents of pd again, notice change in rownames > pd population replicate hr.a3.12 1 1 hr.b3.12 1 2 hr.c3.12 1 3 ts.a4.12 2 1 ts.b4.12 2 2 ts.c4.12 2 3 > > ## Continue preparation for phenoData slot > metaData <- data.frame(labelDescription = c( 'population', 'replicate' )) > > ## Establish new phenoData slot > phenoData(soy.ab) <- new( 'AnnotatedDataFrame', data = pd, varMetadata = metaData) > > ## Display pData(soy.ab) > pData(soy.ab) population replicate hr.a3.12 1 1 hr.b3.12 1 2 hr.c3.12 1 3 ts.a4.12 2 1 ts.b4.12 2 2 ts.c4.12 2 3 > > ## Display phenoData(soy.ab) > phenoData(soy.ab) An object of class "AnnotatedDataFrame" rowNames: hr.a3.12, hr.b3.12, ..., ts.c4.12 (6 total) varLabels and varMetadata description: population: population replicate: replicate > > library( 'simpleaffy' ) > > ## TODO this section is broken > > ## # Load (source in) function for quality control metrics revised > ## # by W. Gregory Alvord and Octavio Quinones to work# with > ## # soybeancdf > source('soybean.mod.qc.affy.r') > > ## # Execute soybean.mod.qc.affy() on object soy.ab > ##soy.ab.qc <- soybean.mod.qc.affy(soy.ab) > > ## # Assign qc metrics to objects > ## avbg.soy.ab.qc <- avbg(soy.ab.qc) > ## minbg.soy.ab.qc <- minbg(soy.ab.qc) > ## maxbg.soy.ab.qc <- maxbg(soy.ab.qc) > ## sfs.soy.ab.qc <- sfs(soy.ab.qc) > ## percent.present.soy.ab.qc <- percent.present(soy.ab.qc) > > ## # Display objects (here, rounded to 2 decimal places) > ## round(minbg.soy.ab.qc ,2) > ## round(maxbg.soy.ab.qc ,2) > ## round(avbg.soy.ab.qc ,2) > ## round(sfs.soy.ab.qc ,2) > ## round(percent.present.soy.ab.qc,2) > > > warnings() Warning message: xy2i is deprecated and will be removed in the next BioC release. Use xy2indices in the affy package instead. >