METATETARD

Phylum abundance analysis of OTUs

on 16S rRNA RNA data

XPDIET

# Analysis using phyloseq
library(phyloseq)
library(ggplot2)
library(phytools)
library(dplyr)
library(gridExtra)
library(picante)
## Folders, Themes, colors
source("prelude.R")

# Setting up directories
data_dir <-paste0(data_dir_path,"xpdiet")
output_dir <- paste0(output_dir_path,"Figure5_diet")

####################################################
# Figure barplot proportion of most abundant phyla #
####################################################
# Importing data
# Defining the directory containing the data to import
setwd(data_dir)

# Working on abundance data from 100 rarefactions
comm<-read.table("xpdiet_fichier_allcom.txt", header=TRUE, row.names=1)
# Getting rid of OTUs with zero abundance
comm<-comm %>% select_if((function(col) is.numeric(col) && sum(col) > 0))
comm<-t(comm)

#Importing metadata
metadata<-read.csv("nmt3_2_metadata_food.csv",sep=";",header=TRUE,row.names=1)
taxmat<-read.csv("nmt3_Gal18_abondance_tax.csv",sep=";",header=TRUE,row.names=1)
taxmat<-as.matrix(taxmat)
OTU<-otu_table(comm, taxa_are_rows = TRUE)
TAX<-tax_table(taxmat)
sample_data<-sample_data(metadata)

#Importing phylogeny
phy<-read.tree("nmt3_sequences_mpr.tree")
#Rooting the tree using midpoint rooting
phy<-midpoint.root(phy)
physeq.ini<-merge_phyloseq(OTU,sample_data,TAX,phy)

#2 - We fuse taxa at the phylum level
physeq.ini.Rg<-tax_glom(physeq.ini, taxrank="Phylum")

#3 - Transforming abundance counts into proportions
physeq.ini.Rgm = transform_sample_counts(physeq.ini.Rg, function(x) 100 * x/sum(x))

#4 - Filtering OTUs
#We keep OTUs with at least 0.5% relative abundance in a given sample
physeq.ini.Rfgm = filter_taxa(physeq.ini.Rgm,function(x) mean(x)>0.5,TRUE)

#5 - We merge samples corresponding to the same Developmental Stage category
# Transform all variables to factors for merging (trick required for R4.0)
df <- as.data.frame(lapply(sample_data(physeq.ini.Rfgm),function (y) if(class(y)!="factor" ) as.factor(y) else y),stringsAsFactors=T)
row.names(df) <- sample_names(physeq.ini.Rfgm)
sample_data(physeq.ini.Rfgm) <- sample_data(df)
#Merging
physeq.ini.Rfgm2 = merge_samples(physeq.ini.Rfgm, "Class")

#6 - We recompute the relative abundance because the sample merge sums up the relative abundance of each merged sample
physeq.ini.Rf2gm2 = transform_sample_counts(physeq.ini.Rfgm, function(x) 100 * x/sum(x))

# We need to recover the good legends corresponding to the category used for merging samples
sample_data(physeq.ini.Rf2gm2)$Class <- levels(sample_data(physeq.ini.Rg)$Class)


#7 - We can go on with the abundance plot itself
# Defining the output dir
setwd(output_dir)
# Plotting

pdf("plot_stade_food_compo.pdf",width=7,height=5)
plot_stade_food_compo<-plot_bar(physeq.ini.Rf2gm2, "ID", fill = "Phylum")+ ylab("Percentage of Sequences")+ scale_fill_manual(values=jcolors)+theme_nb()+theme(axis.text.x = element_text(angle = 0,hjust=0.5))+scale_x_discrete(limits=c("MT-D2-9","MT-D2-8","MT-D2-7","MT-D2-6","MT-D2-5","MT-D2-4","MT-D2-3","MT-D2-2","MT-D2-1","MT-D1-10","MT-D1-9","MT-D1-7","MT-D1-6","MT-D1-5","MT-D1-4","MT-D1-3","MT-D1-2","MT-D1-1"), labels=c("D2-9","D2-8","D2-7","D2-6","D2-5","D2-4","D2-3","D2-2","D2-1","D1-10","D1-9","D1-7","D1-6","D1-5","D1-4","D1-3","D1-2","D1-1"))+theme(legend.position="top")+coord_flip()
plot_stade_food_compo
print(plot_stade_food_compo)
dev.off()
## quartz_off_screen 
##                 2
#######################
#   Ordination plots  #
#######################

#Ordination using unifrac and Principal Coordinate Analysis (PCoA)
# We filter OTUs with abundance of more than 0.001% and convert into relative abundance

#Setting the working directory for output files
setwd(output_dir)

# We keep OTUs seen more than 5 times in 5% of samples
MTD <-  filter_taxa(physeq.ini, function(x) sum(x > 5) > (0.05*length(x)), TRUE)

# How many OTUs were initially found ?
length(row.names(otu_table(physeq.ini)))
## [1] 627
# We found 627 OTUs in this tadpole's gut RNA analysis
# How many OTUs have been filtered ?
length(row.names(otu_table(MTD)))
## [1] 627
# Il y a 627 OTUs sur 627 qui passent le filtre d'abondance et de prévalence.

# We filter OTUs with abundance of more than 0.001% and convert into relative abundance
count2proportion <- function(x){
    x[(x / sum(x)) < (1e-5)] <- 0
    return(x)
}
MTD<-transform_sample_counts(MTD,fun=count2proportion)
#Ordination using weighted unifrac and Principal Coordinate Analysis (PCoA)
ordu<-ordinate(MTD, "PCoA", "unifrac", weighted=TRUE)
#Plotting the ordination results
pw<-plot_ordination(MTD, ordu, shape="Class")
plotw<-pw+geom_point(size=3,alpha=0.5,fill="grey")+scale_shape_manual(values=c(15,0,16,1,6))+labs(shape="Class")
plotw<-plotw+ggtitle("Community structure")+theme_npgray()
plotw<-plotw + stat_ellipse(geom = "polygon", type="norm", alpha=0.4, aes(fill=Class),show.legend = F) + stat_ellipse(type="norm",linetype=2)+theme(legend.position="top")
plotw

ggsave("MDSPCOA_weighted_UNIFRAC_2.pdf",width=8,height=6)
#Removing the legend for the next plot with grid arrange
plotw<-plotw+guides(shape=FALSE)

#Ordination using unweighted unifrac and Principal Coordinate Analysis (PCoA)
ordub<-ordinate(MTD, "PCoA", "unifrac", weighted=FALSE)
#Plotting the ordination results
puw<-plot_ordination(MTD, ordub, shape="Class")
plotuw<-puw+geom_point(size=3,alpha=0.5,fill="grey")+scale_shape_manual(values=c(15,0,16,1,6))+labs(shape="Class")
plotuw<-plotuw+ggtitle("Community membership")+theme_npgray()
plotuw<-plotuw + stat_ellipse(geom = "polygon", type="norm", alpha=0.4, aes(fill=Class),show.legend = F) + stat_ellipse(type="norm",linetype=2)+theme(legend.position="top")
plotuw

ggsave("MDSPCOA_unweighted_UNIFRAC_2.pdf",width=8,height=6)
#Removing the legend for the next plot with grid arrange
plotuw<-plotuw+guides(shape=FALSE)

# Side by side plot for the figure
grid.arrange(plotuw, plotw, ncol=2)

#Saving plot to file
pdf("plotMDSPCOA_wuw_2.pdf",width=12,height=6)
print(grid.arrange(plotuw, plotw, ncol=2))
## TableGrob (1 x 2) "arrange": 2 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
dev.off()
## quartz_off_screen 
##                 2
# Test the significance of unifrac distances

# beta diversity: compute unweighted unifrac distance
uudist<-distance(physeq.ini,method="uunifrac")
adonis(uudist~Class,data=metadata)
## 
## Call:
## adonis(formula = uudist ~ Class, data = metadata) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs  MeanSqs F.Model      R2 Pr(>F)    
## Class      1   0.22573 0.225728  5.8935 0.26919  0.001 ***
## Residuals 16   0.61282 0.038301         0.73081           
## Total     17   0.83855                  1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# beta diversity: compute weighted unifrac distance
uwdist<-distance(physeq.ini,method="wunifrac")
adonis(uwdist~Class,data=metadata)
## 
## Call:
## adonis(formula = uwdist ~ Class, data = metadata) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## Class      1   0.52008 0.52008   24.19 0.60189  0.001 ***
## Residuals 16   0.34400 0.02150         0.39811           
## Total     17   0.86408                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
###########################################
# Comparing distances in ordination plots #
###########################################
#Loading the required function from phyloseq_extended
source("https://raw.githubusercontent.com/mahendra-mariadassou/phyloseq-extended/master/R/graphical_methods.R")
## Loading required package: scales
## Loading required package: reshape2
p1 <- plot_samples(MTD, ordinate(MTD, "MDS", "cc"), color = "Class") + stat_ellipse(type="norm",linetype=2) + ggtitle("MDS + Jaccard") +guides(color=FALSE)
p2 <- plot_samples(MTD, ordinate(MTD, "MDS", "unifrac"), color = "Class") + stat_ellipse(type="norm",linetype=2) + ggtitle("MDS + UniFrac") +guides(color=FALSE)
p3 <- plot_samples(MTD, ordinate(MTD, "MDS", "bray"), color = "Class") + stat_ellipse(type="norm",linetype=2) + ggtitle("MDS + Bray-Curtis") +guides(color=FALSE)
p4 <- plot_samples(MTD, ordinate(MTD, "MDS", "wunifrac"), color = "Class") + stat_ellipse(type="norm",linetype=2) + ggtitle("MDS + weighted UniFrac") +guides(color=FALSE)
grid.arrange(p1, p2, p3, p4, ncol = 2, nrow = 2)

PREVALENCE

library(microbiome)
library(RColorBrewer)
physeq.rel <- microbiome::transform(physeq.ini, "compositional")
physeq1.core <- core(physeq.rel, detection = 0.001, prevalence = .5)
prevalences <- seq(.05, 1, .05)
detections <- 10^seq(log10(1e-3), log10(.2), length = 20)

plot_core(physeq.rel, plot.type = "heatmap", prevalences = prevalences, detections = detections, min.prevalence = .5,colours = rev(brewer.pal(5, "Spectral")),)+theme_npgray()+scale_x_discrete(breaks=detections,labels=c("0.001","","","","","","0.005","","","","","","","","0.05","","","","","0.20"),name="Detection threshold in % of relative abundance")