Updated Practical 3 Processing 16S rRNA amplicon data (markdown) authored by Ben Francis's avatar Ben Francis
...@@ -129,46 +129,48 @@ load the data: ...@@ -129,46 +129,48 @@ load the data:
then do the plotting :) then do the plotting :)
library(ggplot2) ```
library(reshape2) library(ggplot2)
library(RColorBrewer) library(reshape2)
library(RColorBrewer)
plotTaxon <- function(seqtab, taxtable, taxon, min_abund, taxonomicLevel) { plotTaxon <- function(seqtab, taxtable, taxon, min_abund, taxonomicLevel) {
seqtab <- data.frame(seqtab) seqtab <- data.frame(seqtab)
colnames(seqtab) <- paste(taxtable[, 1], taxtable[, 2], taxtable[, 3], taxtable[, 4], colnames(seqtab) <- paste(taxtable[, 1], taxtable[, 2], taxtable[, 3], taxtable[, 4],
taxtable[, 5], taxtable[, 6], as.character(1:length(taxtable[, 1])), sep="-") taxtable[, 5], taxtable[, 6], as.character(1:length(taxtable[, 1])), sep="-")
seqtab <- seqtab[, grep("Chloroplast|Mitochondria", colnames(seqtab), invert=TRUE)] seqtab <- seqtab[, grep("Chloroplast|Mitochondria", colnames(seqtab), invert=TRUE)]
seqtab <- seqtab/rowSums(seqtab) seqtab <- seqtab/rowSums(seqtab)
seqtab1 <- seqtab[, sapply(seqtab, function(x) max(x)) >= min_abund] seqtab1 <- seqtab[, sapply(seqtab, function(x) max(x)) >= min_abund]
seqtab2 <- seqtab[, sapply(seqtab, function(x) max(x)) < min_abund] seqtab2 <- seqtab[, sapply(seqtab, function(x) max(x)) < min_abund]
colnames(seqtab2) <- paste0(colnames(seqtab2), "_Others") colnames(seqtab2) <- paste0(colnames(seqtab2), "_Others")
seqtab3 <- merge(seqtab1, seqtab2, by="row.names") seqtab3 <- merge(seqtab1, seqtab2, by="row.names")
row.names(seqtab3) <- row.names(seqtab) row.names(seqtab3) <- row.names(seqtab)
seqtab <- seqtab3 seqtab <- seqtab3
seqtab$sample <- rownames(seqtab) seqtab$sample <- rownames(seqtab)
seqtab.m <- melt(seqtab) seqtab.m <- melt(seqtab)
seqtab.m <- seqtab.m[grep(taxon, seqtab.m$variable),] seqtab.m <- seqtab.m[grep(taxon, seqtab.m$variable),]
seqtab.m$variable <- gsub("NA", "uncultured", seqtab.m$variable) seqtab.m$variable <- gsub("NA", "uncultured", seqtab.m$variable)
seqtab.m$variable <- gsub(".*Others", "Below_Min_Abund-Below_Min_Abund-Below_Min_Abund-Below_Min_Abund-Below_Min_Abund-Below_Min_Abund-Below_Min_Abund", seqtab.m$variable) seqtab.m$variable <- gsub(".*Others", "Below_Min_Abund-Below_Min_Abund-Below_Min_Abund-Below_Min_Abund-Below_Min_Abund-Below_Min_Abund-Below_Min_Abund", seqtab.m$variable)
seqtab.m$variable <- as.character(seqtab.m$variable) seqtab.m$variable <- as.character(seqtab.m$variable
seqtab.m$Domain <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 1) seqtab.m$Domain <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 1)
seqtab.m$Phylum <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 2) seqtab.m$Phylum <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 2)
seqtab.m$Class <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 3) seqtab.m$Class <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 3)
seqtab.m$Order <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 4) seqtab.m$Order <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 4)
seqtab.m$Family <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 5) seqtab.m$Family <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 5)
seqtab.m$Genus <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 6) seqtab.m$Genus <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 6)
seqtab.m <- cbind(seqtab.m, replicate(1,seqtab.m[colnames(seqtab.m) == taxonomicLevel])) seqtab.m <- cbind(seqtab.m, replicate(1,seqtab.m[colnames(seqtab.m) == taxonomicLevel]))
colnames(seqtab.m) <- c(colnames(seqtab.m)[1:length(colnames(seqtab.m)) -1], "displayLevel") colnames(seqtab.m) <- c(colnames(seqtab.m)[1:length(colnames(seqtab.m)) -1], "displayLevel")
nb.cols <- length(seqtab) - 1 nb.cols <- length(seqtab) - 1
mycolours <- rep(brewer.pal(12, "Paired"), ceiling(nb.cols/12)) mycolours <- rep(brewer.pal(12, "Paired"), ceiling(nb.cols/12))
ggplot(seqtab.m) + ggplot(seqtab.m) +
geom_bar(aes(x=sample, y=value, fill=displayLevel), stat="identity", position="stack") + geom_bar(aes(x=sample, y=value, fill=displayLevel), stat="identity", position="stack") +
scale_fill_manual(values=mycolours) + scale_fill_manual(values=mycolours) +
labs(x="Sample", y="Proportion of Reads", fill=taxonomicLevel) + labs(x="Sample", y="Proportion of Reads", fill=taxonomicLevel) +
theme_classic() + theme_classic() +
theme(text=element_text(family="Serif", size=16)) + theme(text=element_text(family="Serif", size=16)) +
theme(axis.text.x=element_text(angle=45, hjust=1)) theme(axis.text.x=element_text(angle=45, hjust=1))
} }
```
So the bit above creates the function called `plotTaxon()`, which we can then use to generate the plots. So the bit above creates the function called `plotTaxon()`, which we can then use to generate the plots.
... ...
......