# Jeff Kinne's version of the R activity on May 29 as part of Ewy Mathe's workshop # # The setup - we imagine that we have two groups of individuals and measure # metabolites in all of them, and apply t test's to see which # metabolites have a "statistically significant" difference between # the two groups. Let's see... # two groups of this size groupSize <- 50 mygroup <- c(rep("Grp1", groupSize), rep("Grp2",groupSize)) # and this many metabolites n_metab <- 100 # we generate fake random values for each metabolite for each individual # and let them each range from 1 to 25000 mydata <- sample(1:25000, n_metab * (2*groupSize), replace=TRUE) mymat <- matrix(mydata, nrow=n_metab, ncol=(groupSize*2)) # and put it into a matrix # do a t test for each metabolite (each row) and save the p values myp <- c() for (i in 1:n_metab) { onemetab <- mymat[i,] #i'th row, i'th metabolite # t test for the two groups result <- t.test(onemetab[mygroup == "Grp1"], onemetab[mygroup == "Grp2"]) myp <- c(myp, result$p.value) #and save the p value } # instead of using the for loop, could also do it with apply myp <- sapply(1:n_metab, function(i) { onemetab <- mymat[i,] result <- t.test(onemetab[mygroup == "Grp1"], onemetab[mygroup == "Grp2"]) result$p.value }) # plot the histogram. Should look "kind of" flat if the two # groups are not really different. If there is really a difference # between the two groups expect to see a peak near p=0. hist(myp) # and check with adjusted p values (correction for doing multiple comparisons) myp.adjusted <- p.adjust(myp, method="fdr") hist(myp.adjusted) # plot both the myp and myp.adjusted together par(mfrow=c(1,2)) hist(myp, ylim=c(0,20)) hist(myp.adjusted, ylim=c(0,20)) # And do again but now make some of the group2 metabolites different so # we should see those as significant sigRows <- 1:10 mymat.sig <- mymat mymat.sig[sigRows,mygroup == "Grp2"] <- 10 * mymat[sigRows,mygroup == "Grp2"] myp.sig <- sapply(1:n_metab, function(i) { onemetab <- mymat.sig[i,] result <- t.test(onemetab[mygroup == "Grp1"], onemetab[mygroup == "Grp2"]) result$p.value }) hist(myp.sig, ylim=c(0,20)) myp.sig.adjusted <- p.adjust(myp.sig, method="fdr") hist(myp.sig.adjusted, ylim=c(0,20)) which(myp.sig.adjusted < 0.05) (t-162.1152)/162.1152 * 1e6 = 10 * 162.1152 / 1e6