Update Practical 3 Processing 16S rRNA amplicon data authored by Taylor Priest's avatar Taylor Priest
...@@ -184,9 +184,10 @@ Now let's visualise the composition of our sample communities using basic barplo ...@@ -184,9 +184,10 @@ Now let's visualise the composition of our sample communities using basic barplo
physeq_herschel_rel <- transform_sample_counts(physeq_herschel, function(otu) otu/sum(out)) physeq_herschel_rel <- transform_sample_counts(physeq_herschel, function(otu) otu/sum(out))
physeq_herschel_rel physeq_herschel_rel
First let's plot phylum-level composition
herschel_phylum_barplot <- plot_bar(physeq_herschel_rel, fill="Phylum") + herschel_phylum_barplot <- plot_bar(physeq_herschel_rel, fill="Phylum") +
facet_grid(~Transect, scales="free_x") + facet_grid(~Transect, scales="free_x") +
#scale_fill_manual(values=getPalette) +
guides(fill=guide_legend(ncol=1)) + guides(fill=guide_legend(ncol=1)) +
labs(y="Relative abundance", x="Sample") + labs(y="Relative abundance", x="Sample") +
theme_bw() + theme_bw() +
...@@ -198,11 +199,10 @@ facet_grid(~Transect, scales="free_x") + ...@@ -198,11 +199,10 @@ facet_grid(~Transect, scales="free_x") +
strip.text.x = element_text(size=11,face="bold",colour="black")) strip.text.x = element_text(size=11,face="bold",colour="black"))
herschel_phylum_barplot herschel_phylum_barplot
## Now at the Family level Now at the Family level
herschel_family_barplot <- plot_bar(physeq_herschel_rel, fill="Family") + herschel_family_barplot <- plot_bar(physeq_herschel_rel, fill="Family") +
facet_grid(~Transect, scales="free_x") + facet_grid(~Transect, scales="free_x") +
#scale_fill_manual(values=getPalette) +
guides(fill=guide_legend(ncol=1)) + guides(fill=guide_legend(ncol=1)) +
labs(y="Relative abundance", x="Sample") + labs(y="Relative abundance", x="Sample") +
theme_bw() + theme_bw() +
...@@ -214,23 +214,21 @@ herschel_family_barplot <- plot_bar(physeq_herschel_rel, fill="Family") + ...@@ -214,23 +214,21 @@ herschel_family_barplot <- plot_bar(physeq_herschel_rel, fill="Family") +
strip.text.x = element_text(size=11,face="bold",colour="black")) strip.text.x = element_text(size=11,face="bold",colour="black"))
herschel_family_barplot herschel_family_barplot
## WAAAAYYY TOO MANY FAMILIES WAAAAYYY TOO MANY FAMILIES!!!!!
## What should we do?
## This is common with microbial datasets and the best solution is to apply a What should we do?
## threshold, to only show those that are at least x% in abundance
## Before we apply a threshold though, it is good to visualise the occurence This is common with microbial datasets and the best solution is to apply a threshold, to only show those that are at least x% in abundance or occur in x number of samples, or both of these.
## of ASVs before determining a justifiable threshold
filtered = genefilter_sample(physeq_herschel_rel, filterfun_sample(function(x) x > 0.01), A=0.05*nsamples(physeq_herschel_rel)) filtered = genefilter_sample(physeq_herschel_rel, filterfun_sample(function(x) x > 0.01), A=0.05*nsamples(physeq_herschel_rel))
filtered filtered
filtered_taxa = prune_taxa(filtered, physeq_herschel_rel) filtered_taxa = prune_taxa(filtered, physeq_herschel_rel)
filtered_taxa filtered_taxa
## Let's repeat the family level barplot with this filtered dataset Let's repeat the family level barplot with this filtered dataset
herschel_filtered_family_barplot <- plot_bar(filtered_taxa, fill="Family") + herschel_filtered_family_barplot <- plot_bar(filtered_taxa, fill="Family") +
facet_grid(~Transect, scales="free_x") + facet_grid(~Transect, scales="free_x") +
#scale_fill_manual(values=getPalette) +
guides(fill=guide_legend(ncol=1)) + guides(fill=guide_legend(ncol=1)) +
labs(y="Relative abundance", x="Sample") + labs(y="Relative abundance", x="Sample") +
theme_bw() + theme_bw() +
...@@ -242,7 +240,8 @@ herschel_filtered_family_barplot <- plot_bar(filtered_taxa, fill="Family") + ...@@ -242,7 +240,8 @@ herschel_filtered_family_barplot <- plot_bar(filtered_taxa, fill="Family") +
strip.text.x = element_text(size=11,face="bold",colour="black")) strip.text.x = element_text(size=11,face="bold",colour="black"))
herschel_filtered_family_barplot herschel_filtered_family_barplot
## Hard to distinguish these colours, so let's try changing the colour scheme This looks much better, but it is hard to distinguish between some of the colours. This is an inherent problem with barplots but we can try our best to make it a bit clearer.
##First we extract the data from the plot above ##First we extract the data from the plot above
## Then we will assign colours to families and replot ## Then we will assign colours to families and replot
## This time for the plotting we will use ggplot itself (which is what phyloseq was using above) ## This time for the plotting we will use ggplot itself (which is what phyloseq was using above)
...@@ -257,7 +256,6 @@ herschel_filtered_family_barplot_refined <- ggplot(herschel_filtered_family_barp ...@@ -257,7 +256,6 @@ herschel_filtered_family_barplot_refined <- ggplot(herschel_filtered_family_barp
aes(x=Sample, y=Abundance)) + aes(x=Sample, y=Abundance)) +
geom_bar(aes(fill=Family), stat="identity", position="stack") + geom_bar(aes(fill=Family), stat="identity", position="stack") +
facet_grid(~Transect, scales="free_x") + facet_grid(~Transect, scales="free_x") +
#scale_fill_manual(values=getPalette) +
guides(fill=guide_legend(ncol=1)) + guides(fill=guide_legend(ncol=1)) +
labs(y="Relative abundance", x="Sample") + labs(y="Relative abundance", x="Sample") +
scale_fill_manual(values=colorRampPalette(brewer.pal(12,"Paired"))(colourCount)) + scale_fill_manual(values=colorRampPalette(brewer.pal(12,"Paired"))(colourCount)) +
... ...
......