```# Jeff Kinne's version of the R activity on May 29 as part of Ewy Mathe's workshop
#
# The setup - we imagine that we have two groups of individuals and measure
#             metabolites in all of them, and apply t test's to see which
#             metabolites have a "statistically significant" difference between
#             the two groups.  Let's see...

# two groups of this size
groupSize <- 50
mygroup <- c(rep("Grp1", groupSize), rep("Grp2",groupSize))

# and this many metabolites
n_metab <- 100

# we generate fake random values for each metabolite for each individual
# and let them each range from 1 to 25000
mydata <- sample(1:25000, n_metab * (2*groupSize), replace=TRUE)
mymat <- matrix(mydata, nrow=n_metab, ncol=(groupSize*2)) # and put it into a matrix

# do a t test for each metabolite (each row) and save the p values
myp <- c()
for (i in 1:n_metab) {
onemetab <- mymat[i,] #i'th row, i'th metabolite
# t test for the two groups
result <- t.test(onemetab[mygroup == "Grp1"], onemetab[mygroup == "Grp2"])
myp <- c(myp, result\$p.value) #and save the p value
}

# instead of using the for loop, could also do it with apply
myp <- sapply(1:n_metab, function(i) {
onemetab <- mymat[i,]
result <- t.test(onemetab[mygroup == "Grp1"], onemetab[mygroup == "Grp2"])
result\$p.value

})

# plot the histogram.  Should look "kind of" flat if the two
# groups are not really different.  If there is really a difference
# between the two groups expect to see a peak near p=0.
hist(myp)

# and check with adjusted p values (correction for doing multiple comparisons)

# plot both the myp and myp.adjusted together
par(mfrow=c(1,2))
hist(myp, ylim=c(0,20))

# And do again but now make some of the group2 metabolites different so
# we should see those as significant
sigRows <- 1:10
mymat.sig <- mymat
mymat.sig[sigRows,mygroup == "Grp2"] <- 10 * mymat[sigRows,mygroup == "Grp2"]
myp.sig <- sapply(1:n_metab, function(i) {
onemetab <- mymat.sig[i,]
result <- t.test(onemetab[mygroup == "Grp1"], onemetab[mygroup == "Grp2"])
result\$p.value

})
hist(myp.sig, ylim=c(0,20))