Updated Practical 3 Processing 16S rRNA amplicon data (markdown) authored by Ben Francis's avatar Ben Francis
...@@ -134,34 +134,40 @@ then do the plotting :) ...@@ -134,34 +134,40 @@ then do the plotting :)
library(RColorBrewer) library(RColorBrewer)
plotTaxon <- function(seqtab, taxtable, taxon, min_abund, taxonomicLevel) { plotTaxon <- function(seqtab, taxtable, taxon, min_abund, taxonomicLevel) {
seqtab <- data.frame(seqtab) seqtab <- data.frame(seqtab)
colnames(seqtab) <- paste(taxtable[, 1], taxtable[, 2], taxtable[, 3], taxtable[, 4], colnames(seqtab) <- paste(taxtable[, 1], taxtable[, 2], taxtable[, 3], taxtable[, 4],
taxtable[, 5], taxtable[, 6], as.character(1:length(taxtable[, 1])), sep="-") taxtable[, 5], taxtable[, 6], as.character(1:length(taxtable[, 1])), sep="-")
seqtab <- seqtab[, grep("Chloroplast|Mitochondria", colnames(seqtab), invert=TRUE)] seqtab <- seqtab[, grep("Chloroplast|Mitochondria", colnames(seqtab), invert=TRUE)]
seqtab <- seqtab/rowSums(seqtab) seqtab <- seqtab/rowSums(seqtab)
seqtab <- seqtab[, sapply(seqtab, function(x) max(x)) >= min_abund] seqtab1 <- seqtab[, sapply(seqtab, function(x) max(x)) >= min_abund]
seqtab$sample <- rownames(seqtab) seqtab2 <- seqtab[, sapply(seqtab, function(x) max(x)) < min_abund]
seqtab.m <- melt(seqtab) colnames(seqtab2) <- paste0(colnames(seqtab2), "_Others")
seqtab.m <- seqtab.m[grep(taxon, seqtab.m$variable),] seqtab3 <- merge(seqtab1, seqtab2, by="row.names")
seqtab.m$variable <- gsub("NA", "uncultured", seqtab.m$variable) row.names(seqtab3) <- row.names(seqtab)
seqtab.m$variable <- as.character(seqtab.m$variable) seqtab <- seqtab3
seqtab.m$Domain <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 1) seqtab$sample <- rownames(seqtab)
seqtab.m$Phylum <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 2) seqtab.m <- melt(seqtab)
seqtab.m$Class <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 3) seqtab.m <- seqtab.m[grep(taxon, seqtab.m$variable),]
seqtab.m$Order <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 4) seqtab.m$variable <- gsub("NA", "uncultured", seqtab.m$variable)
seqtab.m$Family <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 5) seqtab.m$variable <- gsub(".*Others", "Below_Min_Abund-Below_Min_Abund-Below_Min_Abund-Below_Min_Abund-Below_Min_Abund-Below_Min_Abund-Below_Min_Abund", seqtab.m$variable)
seqtab.m$Genus <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 6) seqtab.m$variable <- as.character(seqtab.m$variable)
seqtab.m <- cbind(seqtab.m, replicate(1,seqtab.m[colnames(seqtab.m) == taxonomicLevel])) seqtab.m$Domain <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 1)
colnames(seqtab.m) <- c(colnames(seqtab.m)[1:length(colnames(seqtab.m)) -1], "displayLevel") seqtab.m$Phylum <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 2)
nb.cols <- length(seqtab) - 1 seqtab.m$Class <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 3)
mycolours <- rep(brewer.pal(12, "Paired"), ceiling(nb.cols/12)) seqtab.m$Order <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 4)
ggplot(seqtab.m) + seqtab.m$Family <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 5)
geom_bar(aes(x=sample, y=value, fill=displayLevel), stat="identity", position="stack") + seqtab.m$Genus <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 6)
scale_fill_manual(values=mycolours) + seqtab.m <- cbind(seqtab.m, replicate(1,seqtab.m[colnames(seqtab.m) == taxonomicLevel]))
labs(x="Sample", y="Proportion of Reads", fill=taxonomicLevel) + colnames(seqtab.m) <- c(colnames(seqtab.m)[1:length(colnames(seqtab.m)) -1], "displayLevel")
theme_classic() + nb.cols <- length(seqtab) - 1
theme(text=element_text(family="Serif", size=16)) + mycolours <- rep(brewer.pal(12, "Paired"), ceiling(nb.cols/12))
theme(axis.text.x=element_text(angle=45, hjust=1)) ggplot(seqtab.m) +
geom_bar(aes(x=sample, y=value, fill=displayLevel), stat="identity", position="stack") +
scale_fill_manual(values=mycolours) +
labs(x="Sample", y="Proportion of Reads", fill=taxonomicLevel) +
theme_classic() +
theme(text=element_text(family="Serif", size=16)) +
theme(axis.text.x=element_text(angle=45, hjust=1))
} }
So the bit above creates the function called `plotTaxon()`, which we can then use to generate the plots. So the bit above creates the function called `plotTaxon()`, which we can then use to generate the plots.
... ...
......