Updated Practical 3 Processing 16S rRNA amplicon data (markdown) authored by Ben Francis's avatar Ben Francis
......@@ -134,34 +134,40 @@ then do the plotting :)
library(RColorBrewer)
plotTaxon <- function(seqtab, taxtable, taxon, min_abund, taxonomicLevel) {
seqtab <- data.frame(seqtab)
colnames(seqtab) <- paste(taxtable[, 1], taxtable[, 2], taxtable[, 3], taxtable[, 4],
taxtable[, 5], taxtable[, 6], as.character(1:length(taxtable[, 1])), sep="-")
seqtab <- seqtab[, grep("Chloroplast|Mitochondria", colnames(seqtab), invert=TRUE)]
seqtab <- seqtab/rowSums(seqtab)
seqtab <- seqtab[, sapply(seqtab, function(x) max(x)) >= min_abund]
seqtab$sample <- rownames(seqtab)
seqtab.m <- melt(seqtab)
seqtab.m <- seqtab.m[grep(taxon, seqtab.m$variable),]
seqtab.m$variable <- gsub("NA", "uncultured", seqtab.m$variable)
seqtab.m$variable <- as.character(seqtab.m$variable)
seqtab.m$Domain <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 1)
seqtab.m$Phylum <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 2)
seqtab.m$Class <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 3)
seqtab.m$Order <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 4)
seqtab.m$Family <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 5)
seqtab.m$Genus <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 6)
seqtab.m <- cbind(seqtab.m, replicate(1,seqtab.m[colnames(seqtab.m) == taxonomicLevel]))
colnames(seqtab.m) <- c(colnames(seqtab.m)[1:length(colnames(seqtab.m)) -1], "displayLevel")
nb.cols <- length(seqtab) - 1
mycolours <- rep(brewer.pal(12, "Paired"), ceiling(nb.cols/12))
ggplot(seqtab.m) +
geom_bar(aes(x=sample, y=value, fill=displayLevel), stat="identity", position="stack") +
scale_fill_manual(values=mycolours) +
labs(x="Sample", y="Proportion of Reads", fill=taxonomicLevel) +
theme_classic() +
theme(text=element_text(family="Serif", size=16)) +
theme(axis.text.x=element_text(angle=45, hjust=1))
seqtab <- data.frame(seqtab)
colnames(seqtab) <- paste(taxtable[, 1], taxtable[, 2], taxtable[, 3], taxtable[, 4],
taxtable[, 5], taxtable[, 6], as.character(1:length(taxtable[, 1])), sep="-")
seqtab <- seqtab[, grep("Chloroplast|Mitochondria", colnames(seqtab), invert=TRUE)]
seqtab <- seqtab/rowSums(seqtab)
seqtab1 <- seqtab[, sapply(seqtab, function(x) max(x)) >= min_abund]
seqtab2 <- seqtab[, sapply(seqtab, function(x) max(x)) < min_abund]
colnames(seqtab2) <- paste0(colnames(seqtab2), "_Others")
seqtab3 <- merge(seqtab1, seqtab2, by="row.names")
row.names(seqtab3) <- row.names(seqtab)
seqtab <- seqtab3
seqtab$sample <- rownames(seqtab)
seqtab.m <- melt(seqtab)
seqtab.m <- seqtab.m[grep(taxon, seqtab.m$variable),]
seqtab.m$variable <- gsub("NA", "uncultured", seqtab.m$variable)
seqtab.m$variable <- gsub(".*Others", "Below_Min_Abund-Below_Min_Abund-Below_Min_Abund-Below_Min_Abund-Below_Min_Abund-Below_Min_Abund-Below_Min_Abund", seqtab.m$variable)
seqtab.m$variable <- as.character(seqtab.m$variable)
seqtab.m$Domain <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 1)
seqtab.m$Phylum <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 2)
seqtab.m$Class <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 3)
seqtab.m$Order <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 4)
seqtab.m$Family <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 5)
seqtab.m$Genus <- sapply(strsplit(seqtab.m$variable, "-"), "[[", 6)
seqtab.m <- cbind(seqtab.m, replicate(1,seqtab.m[colnames(seqtab.m) == taxonomicLevel]))
colnames(seqtab.m) <- c(colnames(seqtab.m)[1:length(colnames(seqtab.m)) -1], "displayLevel")
nb.cols <- length(seqtab) - 1
mycolours <- rep(brewer.pal(12, "Paired"), ceiling(nb.cols/12))
ggplot(seqtab.m) +
geom_bar(aes(x=sample, y=value, fill=displayLevel), stat="identity", position="stack") +
scale_fill_manual(values=mycolours) +
labs(x="Sample", y="Proportion of Reads", fill=taxonomicLevel) +
theme_classic() +
theme(text=element_text(family="Serif", size=16)) +
theme(axis.text.x=element_text(angle=45, hjust=1))
}
So the bit above creates the function called `plotTaxon()`, which we can then use to generate the plots.
......
......