Update Practical 3 Processing 16S rRNA amplicon data authored by Taylor Priest's avatar Taylor Priest
......@@ -184,90 +184,88 @@ Now let's visualise the composition of our sample communities using basic barplo
physeq_herschel_rel <- transform_sample_counts(physeq_herschel, function(otu) otu/sum(out))
physeq_herschel_rel
First let's plot phylum-level composition
herschel_phylum_barplot <- plot_bar(physeq_herschel_rel, fill="Phylum") +
facet_grid(~Transect, scales="free_x") +
#scale_fill_manual(values=getPalette) +
guides(fill=guide_legend(ncol=1)) +
labs(y="Relative abundance", x="Sample") +
theme_bw() +
theme(axis.title.x = element_text(size=12,face="bold",colour="black"),
facet_grid(~Transect, scales="free_x") +
guides(fill=guide_legend(ncol=1)) +
labs(y="Relative abundance", x="Sample") +
theme_bw() +
theme(axis.title.x = element_text(size=12,face="bold",colour="black"),
axis.title.y = element_text(size=12,face="bold",colour="black"),
axis.text.x = element_text(size=10, colour="black", angle=45, hjust=1),
legend.title = element_text(size=12,face="bold",colour="black"),
strip.background.x = element_rect(fill="white"),
strip.text.x = element_text(size=11,face="bold",colour="black"))
herschel_phylum_barplot
herschel_phylum_barplot
## Now at the Family level
Now at the Family level
herschel_family_barplot <- plot_bar(physeq_herschel_rel, fill="Family") +
facet_grid(~Transect, scales="free_x") +
#scale_fill_manual(values=getPalette) +
guides(fill=guide_legend(ncol=1)) +
labs(y="Relative abundance", x="Sample") +
theme_bw() +
theme(axis.title.x = element_text(size=12,face="bold",colour="black"),
herschel_family_barplot <- plot_bar(physeq_herschel_rel, fill="Family") +
facet_grid(~Transect, scales="free_x") +
guides(fill=guide_legend(ncol=1)) +
labs(y="Relative abundance", x="Sample") +
theme_bw() +
theme(axis.title.x = element_text(size=12,face="bold",colour="black"),
axis.title.y = element_text(size=12,face="bold",colour="black"),
axis.text.x = element_text(size=10, colour="black", angle=45, hjust=1),
legend.title = element_text(size=12,face="bold",colour="black"),
strip.background.x = element_rect(fill="white"),
strip.text.x = element_text(size=11,face="bold",colour="black"))
herschel_family_barplot
## WAAAAYYY TOO MANY FAMILIES
## What should we do?
## This is common with microbial datasets and the best solution is to apply a
## threshold, to only show those that are at least x% in abundance
## Before we apply a threshold though, it is good to visualise the occurence
## of ASVs before determining a justifiable threshold
filtered = genefilter_sample(physeq_herschel_rel, filterfun_sample(function(x) x > 0.01), A=0.05*nsamples(physeq_herschel_rel))
filtered
filtered_taxa = prune_taxa(filtered, physeq_herschel_rel)
filtered_taxa
## Let's repeat the family level barplot with this filtered dataset
herschel_filtered_family_barplot <- plot_bar(filtered_taxa, fill="Family") +
facet_grid(~Transect, scales="free_x") +
#scale_fill_manual(values=getPalette) +
guides(fill=guide_legend(ncol=1)) +
labs(y="Relative abundance", x="Sample") +
theme_bw() +
theme(axis.title.x = element_text(size=12,face="bold",colour="black"),
herschel_family_barplot
WAAAAYYY TOO MANY FAMILIES!!!!!
What should we do?
This is common with microbial datasets and the best solution is to apply a threshold, to only show those that are at least x% in abundance or occur in x number of samples, or both of these.
filtered = genefilter_sample(physeq_herschel_rel, filterfun_sample(function(x) x > 0.01), A=0.05*nsamples(physeq_herschel_rel))
filtered
filtered_taxa = prune_taxa(filtered, physeq_herschel_rel)
filtered_taxa
Let's repeat the family level barplot with this filtered dataset
herschel_filtered_family_barplot <- plot_bar(filtered_taxa, fill="Family") +
facet_grid(~Transect, scales="free_x") +
guides(fill=guide_legend(ncol=1)) +
labs(y="Relative abundance", x="Sample") +
theme_bw() +
theme(axis.title.x = element_text(size=12,face="bold",colour="black"),
axis.title.y = element_text(size=12,face="bold",colour="black"),
axis.text.x = element_text(size=10, colour="black", angle=45, hjust=1),
legend.title = element_text(size=12,face="bold",colour="black"),
strip.background.x = element_rect(fill="white"),
strip.text.x = element_text(size=11,face="bold",colour="black"))
herschel_filtered_family_barplot
herschel_filtered_family_barplot
This looks much better, but it is hard to distinguish between some of the colours. This is an inherent problem with barplots but we can try our best to make it a bit clearer.
## Hard to distinguish these colours, so let's try changing the colour scheme
## First we extract the data from the plot above
## Then we will assign colours to families and replot
## This time for the plotting we will use ggplot itself (which is what phyloseq was using above)
##First we extract the data from the plot above
## Then we will assign colours to families and replot
## This time for the plotting we will use ggplot itself (which is what phyloseq was using above)
herschel_filtered_family_barplot_data <- herschel_filtered_family_barplot$data
herschel_filtered_family_barplot_data <- herschel_filtered_family_barplot$data
View(herschel_filtered_family_barplot_data)
herschel_filtered_family_barplot_data[is.na(herschel_filtered_family_barplot_data)] <- "Unknown"
herschel_filtered_family_barplot_data[is.na(herschel_filtered_family_barplot_data)] <- "Unknown"
colourCount=length(unique(herschel_filtered_family_barplot_data$Family))
colourCount=length(unique(herschel_filtered_family_barplot_data$Family))
herschel_filtered_family_barplot_refined <- ggplot(herschel_filtered_family_barplot_data,
herschel_filtered_family_barplot_refined <- ggplot(herschel_filtered_family_barplot_data,
aes(x=Sample, y=Abundance)) +
geom_bar(aes(fill=Family), stat="identity", position="stack") +
facet_grid(~Transect, scales="free_x") +
#scale_fill_manual(values=getPalette) +
guides(fill=guide_legend(ncol=1)) +
labs(y="Relative abundance", x="Sample") +
scale_fill_manual(values=colorRampPalette(brewer.pal(12,"Paired"))(colourCount)) +
theme_bw() +
theme(axis.title.x = element_text(size=12,face="bold",colour="black"),
geom_bar(aes(fill=Family), stat="identity", position="stack") +
facet_grid(~Transect, scales="free_x") +
guides(fill=guide_legend(ncol=1)) +
labs(y="Relative abundance", x="Sample") +
scale_fill_manual(values=colorRampPalette(brewer.pal(12,"Paired"))(colourCount)) +
theme_bw() +
theme(axis.title.x = element_text(size=12,face="bold",colour="black"),
axis.title.y = element_text(size=12,face="bold",colour="black"),
axis.text.x = element_text(size=10, colour="black", angle=45, hjust=1),
legend.title = element_text(size=12,face="bold",colour="black"),
strip.background.x = element_rect(fill="white"),
strip.text.x = element_text(size=11,face="bold",colour="black"))
herschel_filtered_family_barplot_refined
herschel_filtered_family_barplot_refined