### Load the appropriate library for reading affy data, and inspect the data.
library( 'affy' )
soy.ab <- ReadAffy( 'geo_data/GSM209576.CEL.gz',
                   'geo_data/GSM209585.CEL.gz',
                   'geo_data/GSM209594.CEL.gz',
                   'geo_data/GSM209577.CEL.gz',
                   'geo_data/GSM209586.CEL.gz',
                   'geo_data/GSM209595.CEL.gz',

                   ## we have gz files which R can read in.
                   compress=TRUE)


## Inspect the loaded data. This will make a network connection first time
## around and can be slow.
soy.ab

## Check out the names of the samples.
sampleNames( soy.ab )


## as the current sample names refer to the original files, we change this for
## something more, er, easy to remember.
new.sampleNames <- c('hr.a3.12','hr.b3.12','hr.c3.12',
                     'ts.a4.12','ts.b4.12','ts.c4.12')
sampleNames(soy.ab) <- new.sampleNames

## and check that it has worked
sampleNames( soy.ab )


##
## We are trying to do some subsetting because not all of the probes on the
## chip are from soy


## read in another data frame called Species.Affy.ID.
## this links species names to affy ids.
Species.Affy.ID <- read.table('SpeciesAffyID.txt', header = T, sep = "")
dim(Species.Affy.ID)


load( 'SoybeanCutObjects.RData' )

tv.for.glycine.max <- Species.Affy.ID$species == 'Glycine max'
table( tv.for.glycine.max )
listOutProbeSets <- Species.Affy.ID$affyID[ tv.for.glycine.max==FALSE ]

length( listOutProbeSets )
is.factor( listOutProbeSets )

## Create a character vector for listOutProbeSets
## One way: rename listOutProbeSets as a character vector
listOutProbeSets <- as.character(listOutProbeSets)

## Confirm that listOutProbeSets is a character vector
is.character(listOutProbeSets)

## check object
soy.ab


## this is the bit which actually removes the stuff we are not intereste
RemoveProbes(listOutProbes=NULL, listOutProbeSets, cdfpackagename, probepackagename)

## Check that the object has less IDs now. There should be 37444.
soy.ab

# Start preparation for phenoData slot in AffyBatch object
pd <- data.frame(population = c(1,1,1,2,2,2), replicate = c(1,2,3,1,2,3))

# Display contents of pd
pd

# Assign the sampleNames(soy.ab) to the rownames of pd
rownames(pd) <- sampleNames(soy.ab)

# Display contents of pd again, notice change in rownames
pd

## Continue preparation for phenoData slot
metaData <- data.frame(labelDescription = c( 'population', 'replicate' ))

## Establish new phenoData slot
phenoData(soy.ab) <- new( 'AnnotatedDataFrame', data = pd, varMetadata = metaData)

## Display pData(soy.ab)
pData(soy.ab)

## Display phenoData(soy.ab)
phenoData(soy.ab)


## Construct RNA degradation plots and summary
library('affy')
par(mfrow = c(1,1))
RNAdeg <- AffyRNAdeg(soy.ab)
plotAffyRNAdeg(RNAdeg, col = c(rep('blue',3), rep('red', 3)))
summaryAffyRNAdeg(RNAdeg)