Update Practical 3 Processing 16S rRNA amplicon data authored by Taylor Priest's avatar Taylor Priest
......@@ -184,9 +184,10 @@ Now let's visualise the composition of our sample communities using basic barplo
physeq_herschel_rel <- transform_sample_counts(physeq_herschel, function(otu) otu/sum(out))
physeq_herschel_rel
First let's plot phylum-level composition
herschel_phylum_barplot <- plot_bar(physeq_herschel_rel, fill="Phylum") +
facet_grid(~Transect, scales="free_x") +
#scale_fill_manual(values=getPalette) +
guides(fill=guide_legend(ncol=1)) +
labs(y="Relative abundance", x="Sample") +
theme_bw() +
......@@ -198,11 +199,10 @@ facet_grid(~Transect, scales="free_x") +
strip.text.x = element_text(size=11,face="bold",colour="black"))
herschel_phylum_barplot
## Now at the Family level
Now at the Family level
herschel_family_barplot <- plot_bar(physeq_herschel_rel, fill="Family") +
facet_grid(~Transect, scales="free_x") +
#scale_fill_manual(values=getPalette) +
guides(fill=guide_legend(ncol=1)) +
labs(y="Relative abundance", x="Sample") +
theme_bw() +
......@@ -214,23 +214,21 @@ herschel_family_barplot <- plot_bar(physeq_herschel_rel, fill="Family") +
strip.text.x = element_text(size=11,face="bold",colour="black"))
herschel_family_barplot
## WAAAAYYY TOO MANY FAMILIES
## What should we do?
## This is common with microbial datasets and the best solution is to apply a
## threshold, to only show those that are at least x% in abundance
## Before we apply a threshold though, it is good to visualise the occurence
## of ASVs before determining a justifiable threshold
WAAAAYYY TOO MANY FAMILIES!!!!!
What should we do?
This is common with microbial datasets and the best solution is to apply a threshold, to only show those that are at least x% in abundance or occur in x number of samples, or both of these.
filtered = genefilter_sample(physeq_herschel_rel, filterfun_sample(function(x) x > 0.01), A=0.05*nsamples(physeq_herschel_rel))
filtered
filtered_taxa = prune_taxa(filtered, physeq_herschel_rel)
filtered_taxa
## Let's repeat the family level barplot with this filtered dataset
Let's repeat the family level barplot with this filtered dataset
herschel_filtered_family_barplot <- plot_bar(filtered_taxa, fill="Family") +
facet_grid(~Transect, scales="free_x") +
#scale_fill_manual(values=getPalette) +
guides(fill=guide_legend(ncol=1)) +
labs(y="Relative abundance", x="Sample") +
theme_bw() +
......@@ -242,7 +240,8 @@ herschel_filtered_family_barplot <- plot_bar(filtered_taxa, fill="Family") +
strip.text.x = element_text(size=11,face="bold",colour="black"))
herschel_filtered_family_barplot
## Hard to distinguish these colours, so let's try changing the colour scheme
This looks much better, but it is hard to distinguish between some of the colours. This is an inherent problem with barplots but we can try our best to make it a bit clearer.
##First we extract the data from the plot above
## Then we will assign colours to families and replot
## This time for the plotting we will use ggplot itself (which is what phyloseq was using above)
......@@ -257,7 +256,6 @@ herschel_filtered_family_barplot_refined <- ggplot(herschel_filtered_family_barp
aes(x=Sample, y=Abundance)) +
geom_bar(aes(fill=Family), stat="identity", position="stack") +
facet_grid(~Transect, scales="free_x") +
#scale_fill_manual(values=getPalette) +
guides(fill=guide_legend(ncol=1)) +
labs(y="Relative abundance", x="Sample") +
scale_fill_manual(values=colorRampPalette(brewer.pal(12,"Paired"))(colourCount)) +
......
......