# Jeff Kinne's version of the R activity on May 29 as part of Ewy Mathe's workshop
#
# The setup - we imagine that we have two groups of individuals and measure
# metabolites in all of them, and apply t test's to see which
# metabolites have a "statistically significant" difference between
# the two groups. Let's see...
# two groups of this size
groupSize <- 50
mygroup <- c(rep("Grp1", groupSize), rep("Grp2",groupSize))
# and this many metabolites
n_metab <- 100
# we generate fake random values for each metabolite for each individual
# and let them each range from 1 to 25000
mydata <- sample(1:25000, n_metab * (2*groupSize), replace=TRUE)
mymat <- matrix(mydata, nrow=n_metab, ncol=(groupSize*2)) # and put it into a matrix
# do a t test for each metabolite (each row) and save the p values
myp <- c()
for (i in 1:n_metab) {
onemetab <- mymat[i,] #i'th row, i'th metabolite
# t test for the two groups
result <- t.test(onemetab[mygroup == "Grp1"], onemetab[mygroup == "Grp2"])
myp <- c(myp, result$p.value) #and save the p value
}
# instead of using the for loop, could also do it with apply
myp <- sapply(1:n_metab, function(i) {
onemetab <- mymat[i,]
result <- t.test(onemetab[mygroup == "Grp1"], onemetab[mygroup == "Grp2"])
result$p.value
})
# plot the histogram. Should look "kind of" flat if the two
# groups are not really different. If there is really a difference
# between the two groups expect to see a peak near p=0.
hist(myp)
# and check with adjusted p values (correction for doing multiple comparisons)
myp.adjusted <- p.adjust(myp, method="fdr")
hist(myp.adjusted)
# plot both the myp and myp.adjusted together
par(mfrow=c(1,2))
hist(myp, ylim=c(0,20))
hist(myp.adjusted, ylim=c(0,20))
# And do again but now make some of the group2 metabolites different so
# we should see those as significant
sigRows <- 1:10
mymat.sig <- mymat
mymat.sig[sigRows,mygroup == "Grp2"] <- 10 * mymat[sigRows,mygroup == "Grp2"]
myp.sig <- sapply(1:n_metab, function(i) {
onemetab <- mymat.sig[i,]
result <- t.test(onemetab[mygroup == "Grp1"], onemetab[mygroup == "Grp2"])
result$p.value
})
hist(myp.sig, ylim=c(0,20))
myp.sig.adjusted <- p.adjust(myp.sig, method="fdr")
hist(myp.sig.adjusted, ylim=c(0,20))
which(myp.sig.adjusted < 0.05)
(t-162.1152)/162.1152 * 1e6 = 10 * 162.1152 / 1e6