### Load the appropriate library for reading affy data, and inspect the data.
library( 'affy' )
soy.ab <- ReadAffy( 'geo_data/GSM209576.CEL.gz',
                   'geo_data/GSM209585.CEL.gz',
                   'geo_data/GSM209594.CEL.gz',
                   'geo_data/GSM209577.CEL.gz',
                   'geo_data/GSM209586.CEL.gz',
                   'geo_data/GSM209595.CEL.gz',

                   ## we have gz files which R can read in.
                   compress=TRUE)


## Inspect the loaded data. This will make a network connection first time
## around and can be slow.
soy.ab

## Check out the names of the samples.
sampleNames( soy.ab )


## as the current sample names refer to the original files, we change this for
## something more, er, easy to remember.
new.sampleNames <- c('hr.a3.12','hr.b3.12','hr.c3.12',
                     'ts.a4.12','ts.b4.12','ts.c4.12')
sampleNames(soy.ab) <- new.sampleNames

## and check that it has worked
sampleNames( soy.ab )


##
## We are trying to do some subsetting because not all of the probes on the
## chip are from soy


## read in another data frame called Species.Affy.ID.
## this links species names to affy ids.
Species.Affy.ID <- read.table('SpeciesAffyID.txt', header = T, sep = "")
dim(Species.Affy.ID)


load( 'SoybeanCutObjects.RData' )

tv.for.glycine.max <- Species.Affy.ID$species == 'Glycine max'
table( tv.for.glycine.max )
listOutProbeSets <- Species.Affy.ID$affyID[ tv.for.glycine.max==FALSE ]

length( listOutProbeSets )
is.factor( listOutProbeSets )

## Create a character vector for listOutProbeSets
## One way: rename listOutProbeSets as a character vector
listOutProbeSets <- as.character(listOutProbeSets)

## Confirm that listOutProbeSets is a character vector
is.character(listOutProbeSets)

## check object
soy.ab


## this is the bit which actually removes the stuff we are not intereste
RemoveProbes(listOutProbes=NULL, listOutProbeSets, cdfpackagename, probepackagename)

## Check that the object has less IDs now. There should be 37444.
soy.ab

# Start preparation for phenoData slot in AffyBatch object
pd <- data.frame(population = c(1,1,1,2,2,2), replicate = c(1,2,3,1,2,3))

# Display contents of pd
pd

# Assign the sampleNames(soy.ab) to the rownames of pd
rownames(pd) <- sampleNames(soy.ab)

# Display contents of pd again, notice change in rownames
pd

## Continue preparation for phenoData slot
metaData <- data.frame(labelDescription = c( 'population', 'replicate' ))

## Establish new phenoData slot
phenoData(soy.ab) <- new( 'AnnotatedDataFrame', data = pd, varMetadata = metaData)

## Display pData(soy.ab)
pData(soy.ab)

## Display phenoData(soy.ab)
phenoData(soy.ab)

palette.gray <- c(rep(gray(0:10/10), times = seq(1,41, by = 4)))

library('RColorBrewer')
brewer.cols <- brewer.pal(6, 'Set1')


## this section is also current broken...

## # Fit a probe-level model to the soy.ab probe-level data
## library('affyPLM')
## Pset1 <- fitPLM(soy.ab)

## # Display probe-level quality diagnostics for array # 1
## par(mfrow = c(2,2))
## par(oma = c(3,1,3,1))
## image(soy.ab[,1], col = palette.gray)
## image(Pset1, type = 'weights', which = 1)
## image(Pset1, type = 'resids', which = 1)
## image(Pset1, type = 'sign.resids', which = 1)
## mtext('Probe Level Models - QC Checks', side = 3, outer = T)
## par(mfrow = c(1,1))
## # Repeat above commands with .. soy.ab[,i], i = 2-6; which = i

## # Construct Relative Log Expression (RLE) Plot
## library(affyPLM)
## Mbox(Pset1, col = brewer.cols, names = NULL, main = 'Relative Log Expression Plot - H/R vs. T/S')

## # Add ylim = c(-.5, .5) after observing previous plot
## Mbox(Pset1, col = brewer.cols, ylim = c(-.5,.5),  main = 'Relative Log Expression Plot - H/R vs. T/S')


## # Construct Normalized Unscaled Standard Error (NUSE) Plot
## boxplot(Pset1, col = brewer.cols, main = 'NUSE Plot', ylab = 'NUSE - Normalized Unscaled Standard Error')

## # Add ylim = c(.9, 1.1) after observing previous plot
## boxplot(Pset1, ylim = c(.9, 1.1), col = brewer.cols, main = 'NUSE Plot', ylab = 'NUSE - Normalized Unscaled Standard Error', las = 2)