### Load the appropriate library for reading affy data, and inspect the data.
library( 'affy' )
soy.ab <- ReadAffy( 'geo_data/GSM209576.CEL.gz',
                   'geo_data/GSM209585.CEL.gz',
                   'geo_data/GSM209594.CEL.gz',
                   'geo_data/GSM209577.CEL.gz',
                   'geo_data/GSM209586.CEL.gz',
                   'geo_data/GSM209595.CEL.gz',

                   ## we have gz files which R can read in.
                   compress=TRUE)


## Inspect the loaded data. This will make a network connection first time
## around and can be slow.
soy.ab

## Check out the names of the samples.
sampleNames( soy.ab )


## as the current sample names refer to the original files, we change this for
## something more, er, easy to remember.
new.sampleNames <- c('hr.a3.12','hr.b3.12','hr.c3.12',
                     'ts.a4.12','ts.b4.12','ts.c4.12')
sampleNames(soy.ab) <- new.sampleNames

## and check that it has worked
sampleNames( soy.ab )


##
## We are trying to do some subsetting because not all of the probes on the
## chip are from soy


## read in another data frame called Species.Affy.ID.
## this links species names to affy ids.
Species.Affy.ID <- read.table('SpeciesAffyID.txt', header = T, sep = "")
dim(Species.Affy.ID)


load( 'SoybeanCutObjects.RData' )

tv.for.glycine.max <- Species.Affy.ID$species == 'Glycine max'
table( tv.for.glycine.max )
listOutProbeSets <- Species.Affy.ID$affyID[ tv.for.glycine.max==FALSE ]

length( listOutProbeSets )
is.factor( listOutProbeSets )

## Create a character vector for listOutProbeSets
## One way: rename listOutProbeSets as a character vector
listOutProbeSets <- as.character(listOutProbeSets)

## Confirm that listOutProbeSets is a character vector
is.character(listOutProbeSets)

## check object
soy.ab


## this is the bit which actually removes the stuff we are not intereste
RemoveProbes(listOutProbes=NULL, listOutProbeSets, cdfpackagename, probepackagename)

## Check that the object has less IDs now. There should be 37444.
soy.ab