on 16S rRNA RNA data

# Turning off message and warning for rendering
#Loading packages
library(picante)
library(ggplot2)
library(phytools)
library(dplyr)
library(grid)
library(gridExtra)
library(ggstatsplot)

## Folders, Themes, colors
source("prelude.R")

# Setting up directories
data_dir <-paste0(data_dir_path,"xplate")
output_dir <- paste0(output_dir_path,"Figure_late")

# Defining the directory containing the data to import
setwd(data_dir)

### Rarefaction analysis : importing data
comm_abundance_1<-read.table("nmt3_abondance_early_tpn_notax.csv",sep=";",header=TRUE,row.names=1)

# Defining the output dir
setwd(output_dir)

# Getting rid of OTUs with zero abundance
comm_abundance_1<-comm_abundance_1 %>% select_if((function(col) is.numeric(col) && sum(col) > 0))
S<-specnumber(comm_abundance_1)
raremax <- min(rowSums(comm_abundance_1))
Srare <- rarefy(comm_abundance_1, raremax)
#Rarefaction
plot(S, Srare, xlab = "Observed No. of Species", ylab = "Rarefied No. of Species")
abline(0, 1)

# Saving plot as a pdf file
#pdf("plot_rarefaction1_rna.pdf",width=5,height=4)
#print(plot(S, Srare, xlab = "Observed No. of Species", ylab = "Rarefied No. of Species"))
#print(abline(0, 1))
#dev.off()
# Rarefaction curve
rarecurve(comm_abundance_1, step = 20, sample = raremax, col = "blue", cex = 0.6)

# Saving plot as a pdf file
#pdf("plot_rarefaction2_rna.pdf",width=5,height=4)
#print(rarecurve(comm_abundance_1, step = 20, sample = raremax, col = "blue", cex = 0.6))
#dev.off()


###Diversity analysis
# Defining the directory containing the data to import
setwd(data_dir)
# Working on abundance data from 100 rarefactions
allcom<-read.table("xplate_fichier_allcom.txt", header=TRUE, row.names=1)
# Getting rid of OTUs with zero abundance
allcom<-allcom %>% select_if((function(col) is.numeric(col) && sum(col) > 0))
commRS<-decostand(allcom, method="total")
comm<-commRS

#Importing metadata
metadata<-read.csv("nmt3_metadata_late.csv",sep=";",header=TRUE,row.names=1)
metadata$DevelopmentalStage=as.factor(metadata$DevelopmentalStage)
#Importing phylogeny
phy<-read.tree("nmt3_sequences_mpr.tree")
#Rooting the tree using midpoint rooting
phy<-midpoint.root(phy)
#Getting rid of taxa that are missing in the studied samples
phy<-prune.sample(comm, phy)
#Combining the input data
combined <- match.phylo.comm(phy, comm)
phy <- combined$phy
comm <- combined$comm
metadata <- metadata[rownames(comm), ]
#Checking the index
all.equal(rownames(comm), rownames(metadata))

## [1] TRUE

#Checking the normalisation
rowSums(comm)

##  SD-SL-iNF52  SD-SL-iNF56  SD-SL-iNF60  SD-SL-iNF63  SD-SL-iNF66 SD-TGA-iNF52 
##            1            1            1            1            1            1 
## SD-TGA-iNF56 SD-TGA-iNF60 SD-TGA-iNF63 SD-TGA-iNF66 
##            1            1            1            1

#######################################
# Analysis of taxonomic diversity     #
#######################################
# Defining the output dir
setwd(output_dir)

# For the boxplot with ggplot2
df<-data.frame(Nb.OTU= specnumber(comm),DevelopmentalStage=metadata$DevelopmentalStage,Population=metadata$Population)
ggplot(df,aes(x=DevelopmentalStage,y=Nb.OTU))+geom_bar(position="dodge",stat="identity")+theme_npgray()+scale_fill_grey()+scale_x_discrete(limits=c("NF52","NF56","NF60","NF63","NF66"))+xlab("Developmental stage")+facet_grid(~Population)

ggsave("Plot_stage_early_1.pdf",width=10,height=13,units="cm")

#Analysis of phylogenetic diversity
comm.pd <- pd(comm, phy)
# For the boxplot with ggplot2
df2<-data.frame(Faith.PD= comm.pd$PD,DevelopmentalStage=metadata$DevelopmentalStage,Population=metadata$Population)
ggplot(df2,aes(x=DevelopmentalStage,y=Faith.PD))+geom_bar(position="dodge",stat="identity")+theme_npgray()+scale_fill_grey()+scale_x_discrete(limits=c("NF52","NF56","NF60","NF63","NF66"))+xlab("Developmental stage")+facet_grid(~Population)+xlab("Developmental stage")

ggsave("Plot_stage_early_2.pdf",width=10,height=13,units="cm")


# Shannon index
comm.H<-diversity(comm, index="shannon", base=exp(1)) 
dfH<-data.frame(H=comm.H,DevelopmentalStage=metadata$DevelopmentalStage,Population=metadata$Population)
plotH<-ggplot(dfH,aes(x=DevelopmentalStage,y=H,fill=Population))+geom_bar(position="dodge",stat="identity")+scale_fill_grey()+scale_x_discrete(limits=c("NF52","NF56","NF60","NF63","NF66"))+facet_grid(~Population)+xlab("Developmental stage")+labs(y="Shannon diversity",x="")


# Simpson diversity index
comm.D<-diversity(comm, index="simpson") 
dfD<-data.frame(D=comm.D,DevelopmentalStage=metadata$DevelopmentalStage,Population=metadata$Population)
plotD<-ggplot(dfD,aes(x=DevelopmentalStage,y=D,fill=Population))+geom_bar(position="dodge",stat="identity")+scale_fill_grey()+scale_x_discrete(limits=c("NF52","NF56","NF60","NF63","NF66"))+facet_grid(~Population)+xlab("Developmental stage")+labs(y="Simpson diversity",x="")

# Pielou equitability index
comm.J <- comm.H/log(specnumber(comm), base=exp(1))
dfJ<-data.frame(J= comm.J,DevelopmentalStage=metadata$DevelopmentalStage,Population=metadata$Population)
plotJ<-ggplot(dfJ,aes(x=DevelopmentalStage,y=J,fill=Population))+geom_bar(position="dodge",stat="identity")+scale_fill_grey()+scale_x_discrete(limits=c("NF52","NF56","NF60","NF63","NF66"))+facet_grid(~Population)+xlab("Developmental stage")+labs(y="Pielou evenness",x="")

# Plots of the three alpha-diversity metrics
grid.arrange(plotH, plotD, plotJ,nrow=3,ncol=1)

# Saving plot as a pdf file
#pdf("plotHDJ_Stage.pdf",width=6,height=10)
#print(grid.arrange(plotH, plotD, plotJ,nrow=3,ncol=1))
#dev.off()

# Comparison of dissimilarity measures
rankindex(comm,comm,c("euc","man","bray","jac","kul"))

##       euc       man      bray       jac       kul 
## 0.7003953 0.8367589 0.8367589 0.8367589 0.8367589

# Checking that we have a correct sampling of the diversity in our samples
plot(specaccum(comm), xlab = "# of samples", ylab = "# of species",main="Species accumulation in tadpole's gut microbiomes " )

# Saving plot as a pdf file
#pdf("plot_spec_accum_stage.pdf",width=7,height=5)
#print(plot(specaccum(comm), xlab = "# of samples", ylab = "# of species",main="Species accumulation in tadpole's gut microbiomes " ))
#dev.off()

# Computing dissimilarity using Bray-Curtis distance (weighted)
comm.bc.dist <- vegdist(comm, method = "bray")
# Clustering to analyse samples - Bray
comm.bc.clust <- hclust(comm.bc.dist, method = "average")
plot(comm.bc.clust, ylab = "Bray-Curtis dissimilarity")

# Saving plot as a pdf file
#pdf("plot_bc_clust_early.pdf",width=8,height=6)
#print(plot(comm.bc.clust, ylab = "Bray-Curtis dissimilarity"))
#dev.off()

#Dissimilarity using Sorenson (unweighted)
comm.so.dist<-vegdist(comm,method="bray",binary="TRUE")
# Clustering to analyse samples - Sorenson
comm.so.clust <- hclust(comm.so.dist, method = "average")
plot(comm.so.clust, ylab = "Sorenson dissimilarity")

# Saving plot as a pdf file
#pdf("plot_so_clust_early.pdf",width=8,height=6)
#print(plot(comm.so.clust, ylab = "Sorenson dissimilarity"))
#dev.off()

# Ordination using NMDS
comm.bc.mds <- metaMDS(comm, dist = "bray")

## Run 0 stress 0.0005231172 
## Run 1 stress 0.0004424161 
## ... New best solution
## ... Procrustes: rmse 0.0006074575  max resid 0.00116398 
## ... Similar to previous best
## Run 2 stress 0.0001601801 
## ... New best solution
## ... Procrustes: rmse 0.1131774  max resid 0.2735635 
## Run 3 stress 9.678089e-05 
## ... New best solution
## ... Procrustes: rmse 0.0001981568  max resid 0.0004157821 
## ... Similar to previous best
## Run 4 stress 9.891049e-05 
## ... Procrustes: rmse 0.007381759  max resid 0.01789149 
## Run 5 stress 9.972065e-05 
## ... Procrustes: rmse 0.0001616688  max resid 0.0003259267 
## ... Similar to previous best
## Run 6 stress 0.0001513282 
## ... Procrustes: rmse 0.1113245  max resid 0.2893559 
## Run 7 stress 9.451304e-05 
## ... New best solution
## ... Procrustes: rmse 1.475005e-05  max resid 3.149516e-05 
## ... Similar to previous best
## Run 8 stress 0.000335401 
## ... Procrustes: rmse 0.04320001  max resid 0.1069251 
## Run 9 stress 0.0001574778 
## ... Procrustes: rmse 0.08518418  max resid 0.2172328 
## Run 10 stress 0.0002239629 
## ... Procrustes: rmse 0.08664037  max resid 0.2211307 
## Run 11 stress 0.0006157232 
## Run 12 stress 0.0003270883 
## ... Procrustes: rmse 0.04129314  max resid 0.1020825 
## Run 13 stress 9.984267e-05 
## ... Procrustes: rmse 0.02268405  max resid 0.05549907 
## Run 14 stress 0.0002451062 
## ... Procrustes: rmse 0.07597814  max resid 0.1924212 
## Run 15 stress 0.0001638108 
## ... Procrustes: rmse 0.07991811  max resid 0.2030288 
## Run 16 stress 9.588592e-05 
## ... Procrustes: rmse 1.261972e-05  max resid 2.505573e-05 
## ... Similar to previous best
## Run 17 stress 0.0005470961 
## ... Procrustes: rmse 0.1136224  max resid 0.2951274 
## Run 18 stress 0.0004382263 
## ... Procrustes: rmse 0.06834923  max resid 0.172019 
## Run 19 stress 0.0002115468 
## ... Procrustes: rmse 0.04119632  max resid 0.101906 
## Run 20 stress 9.660443e-05 
## ... Procrustes: rmse 8.502147e-05  max resid 0.0001774282 
## ... Similar to previous best
## *** Solution reached

# We have a stress value of 1e-4, thus the NMDS ordination provides an excellent representation of the distances between the active bacterial communities of the tadpole's gut samples.

# Checking the NMDS
stressplot(comm.bc.mds)

# 
# Saving plot as a pdf file
#pdf("plot_nmds_stressplot.pdf",width=8,height=6)
#print(stressplot(comm.bc.mds))
#dev.off()

mds.fig <- ordiplot(comm.bc.mds, type = "none")
points(mds.fig, "sites", pch = 19, col = "grey")
orditorp(comm.bc.mds, display = "sites")

# Saving plot to a file
#pdf("plot1_nmds.pdf",width=8,height=6)
#print(mds.fig <- ordiplot(comm.bc.mds, type = "none"))
#print(points(mds.fig, "sites", pch = 19, col = "grey"))
#print(orditorp(comm.bc.mds, display = "sites"))
#dev.off()

# Plotting NMDS ordination using Bray-Curtis distance
#par(new=TRUE)
mds.fig2 <- ordiplot(comm.bc.mds, type = "none")
points(mds.fig2, "species", pch = 1,cex=0.5, col = "grey")
ordispider(comm.bc.mds,groups=metadata$DevelopmentalStage,show.groups="NF52",col="green")
points(mds.fig2, "sites", pch = 19, col = "green", select = metadata$DevelopmentalStage ==  "NF52")
ordispider(comm.bc.mds,groups=metadata$DevelopmentalStage,show.groups="NF56",col="black")
points(mds.fig2, "sites", pch = 19, col = "black", select = metadata$DevelopmentalStage ==  "NF56")
ordispider(comm.bc.mds,groups=metadata$DevelopmentalStage,show.groups="NF60",col="orange")
points(mds.fig2, "sites", pch = 19, col = "orange", select = metadata$DevelopmentalStage ==  "NF60")
ordispider(comm.bc.mds,groups=metadata$DevelopmentalStage,show.groups="NF63",col="black")
points(mds.fig2, "sites", pch = 19, col = "blue", select = metadata$DevelopmentalStage ==  "NF63")
ordispider(comm.bc.mds,groups=metadata$DevelopmentalStage,show.groups="NF66",col="red")
points(mds.fig2, "sites", pch = 19, col = "red", select = metadata$DevelopmentalStage ==  "NF66")


ordiellipse(comm.bc.mds, metadata$DevelopmentalStage, conf = 0.95, label = FALSE,lty = 'dotted')

################################
#  Ordination beta-dispersion  #
################################

# Plotting the ordination results of beta-dispersion PCoA (PERMDISP)
comm.betadisp=betadisper(comm.bc.dist, metadata$DevelopmentalStage, type="median")
comm.betadisp

## 
##  Homogeneity of multivariate dispersions
## 
## Call: betadisper(d = comm.bc.dist, group = metadata$DevelopmentalStage,
## type = "median")
## 
## No. of Positive Eigenvalues: 9
## No. of Negative Eigenvalues: 0
## 
## Average distance to median:
##    NF52    NF56    NF60    NF63    NF66 
## 0.03869 0.07199 0.10865 0.11824 0.04825 
## 
## Eigenvalues for PCoA axes:
## (Showing 8 of 9 eigenvalues)
##    PCoA1    PCoA2    PCoA3    PCoA4    PCoA5    PCoA6    PCoA7    PCoA8 
## 0.049240 0.030483 0.014086 0.009005 0.003876 0.002191 0.002047 0.001274

mds.fig3a<-plot(comm.betadisp)

#Saving plot to a file
#pdf("plot1_betadispa.pdf",width=8,height=6)
#print(mds.fig3a<-plot(comm.betadisp))
#dev.off()

# Significance
comm.betadisp.perm=permutest(comm.betadisp, group= metadata$DevelopmentalStage, type="median", permutations=999, pairwise=TRUE)
comm.betadisp.perm

## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df   Sum Sq   Mean Sq          F N.Perm Pr(>F)    
## Groups     4 0.010044 0.0025111 2.3018e+29    999  0.001 ***
## Residuals  5 0.000000 0.0000000                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
##            NF52       NF56       NF60       NF63  NF66
## NF52            8.0000e-03 5.0000e-03 4.0000e-03 0.011
## NF56 1.9580e-29            7.0000e-03 1.5000e-02 0.016
## NF60 2.4591e-31 1.6407e-29            1.1000e-02 0.005
## NF63 1.7878e-30 1.5034e-29 1.2665e-28            0.009
## NF66 5.2662e-30 3.7844e-29 2.2435e-31 2.2310e-30

#######################################################
#  Analysis of community structure using MDP and MNTD #
#######################################################


#Phylogenetic distance to measure the community phylogenetic structure  without abundance
phy.dist <- cophenetic(phy)
comm.sesmpd <- ses.mpd(comm, phy.dist, null.model = "richness", abundance.weighted = FALSE,  runs = 9)
comm.sesmntd <- ses.mntd(comm, phy.dist, null.model = "richness", abundance.weighted = FALSE,  runs = 9)

#Inspecting obtained values
#comm.sesmpd
#comm.sesmntd

#Figure with comparisons SES(MPD) and SES(MNTD)
df3<-data.frame(SES_MPD = comm.sesmpd$mpd.obs.z, DevelopmentalStage = metadata$DevelopmentalStage,Population=metadata$Population)
plot3<-ggplot(df3,aes(x=DevelopmentalStage,y=SES_MPD,color=Population))+geom_jitter(width=0.1,height=0.01,size=5,alpha=0.9)+scale_x_discrete(limits=c("NF52","NF56","NF60","NF63","NF66"))+theme_nb()+labs(x="",y="SES MPD")+guides(color=FALSE)
df4<-data.frame(SES_MNTD = comm.sesmntd$mntd.obs.z, DevelopmentalStage = metadata$DevelopmentalStage,Population=metadata$Population)
plot4<-ggplot(df4,aes(x=DevelopmentalStage,y=SES_MNTD,color=Population))+geom_jitter(width=0.1,height=0.01,size=5,alpha=0.9)+scale_x_discrete(limits=c("NF52","NF56","NF60","NF63","NF66"))+theme_nb()+labs(x="",y="SES MNTD")+guides(color=FALSE)

grid.arrange(plot3, plot4, ncol=2)

# Saving plot to a file
#pdf("plot34_sesmpdmntd.pdf",width=5,height=4)
#print(grid.arrange(plot3, plot4, ncol=2))
#dev.off()


#Significance
#Using Kruskal-Wallis test
kw_sesmpd<-kruskal.test(comm.sesmpd$mpd.obs.z ~ metadata$DevelopmentalStage, data= comm)
kw_sesmntd<-kruskal.test(comm.sesmntd$mntd.obs.z ~ metadata$DevelopmentalStage, data= comm)

set.seed(623)
ggstatsplot::ggbetweenstats(data=df3,x=DevelopmentalStage,y=SES_MPD,mean.plotting = FALSE,type="np",plot.type = "box")

## Note: 95% CI for effect size estimate was computed with 100 bootstrap samples.

##

## Note: Shapiro-Wilk Normality Test for SES_MPD: p-value = 0.206

##

## Note: Bartlett's test for homogeneity of variances for factor DevelopmentalStage: p-value = 0.494

##

ggsave("plot_SESMPD_stat.pdf",width=5,height=4)
ggstatsplot::ggbetweenstats(data=df4,x=DevelopmentalStage,y=SES_MNTD,mean.plotting = FALSE,type="np",plot.type = "box")

## Note: 95% CI for effect size estimate was computed with 100 bootstrap samples.

##

## Note: Shapiro-Wilk Normality Test for SES_MNTD: p-value = 0.726

##

## Note: Bartlett's test for homogeneity of variances for factor DevelopmentalStage: p-value = 0.267

##

ggsave("plot_SESMNTD_stat.pdf",width=5,height=4)


#####################################
# Phylogenetic Beta-diversity       #
#####################################
# beta diversity: Taxonomic (Bray-Curtis) dissimilarity explained (Permanova)
adonis(comm.bc.dist ~ DevelopmentalStage, data = metadata)

## 
## Call:
## adonis(formula = comm.bc.dist ~ DevelopmentalStage, data = metadata) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                    Df SumsOfSqs  MeanSqs F.Model     R2 Pr(>F)
## DevelopmentalStage  4  0.043397 0.010849 0.77953 0.3841  0.738
## Residuals           5  0.069588 0.013917         0.6159       
## Total               9  0.112984                  1.0000

# beta diversity: compute phylogenetic dissimilarity using MPD
comm.mpd.dist<-comdist(comm,phy.dist,abundance.weighted=TRUE)
adonis(comm.mpd.dist ~ DevelopmentalStage, data = metadata)

## 
## Call:
## adonis(formula = comm.mpd.dist ~ DevelopmentalStage, data = metadata) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                    Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## DevelopmentalStage  4   0.51821 0.12955 0.99965 0.44436  0.481
## Residuals           5   0.64798 0.12960         0.55564       
## Total               9   1.16619                 1.00000

# beta diversity: compute phylogenetic dissimilarity using MNTD
comm.mntd.dist <- comdistnt(comm, phy.dist, abundance.weighted = TRUE)
adonis(comm.mntd.dist ~ DevelopmentalStage, data = metadata)

## 
## Call:
## adonis(formula = comm.mntd.dist ~ DevelopmentalStage, data = metadata) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                    Df   SumsOfSqs     MeanSqs  F.Model       R2 Pr(>F)
## DevelopmentalStage  4 -4.7584e-06 -1.1896e-06 -0.29432 -0.30797      1
## Residuals           5  2.0209e-05  4.0419e-06           1.30797       
## Total               9  1.5451e-05                       1.00000

# calculate Mantel correlation for taxonomic Bray-Curtis vs. phylogenetic
# MPD diversity
mantel(comm.bc.dist, comm.mpd.dist)

## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = comm.bc.dist, ydis = comm.mpd.dist) 
## 
## Mantel statistic r: 0.2764 
##       Significance: 0.155 
## 
## Upper quantiles of permutations (null model):
##   90%   95% 97.5%   99% 
## 0.369 0.437 0.522 0.594 
## Permutation: free
## Number of permutations: 999

#MNTD diversity
mantel(comm.bc.dist, comm.mntd.dist)

## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = comm.bc.dist, ydis = comm.mntd.dist) 
## 
## Mantel statistic r: 0.7305 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##   90%   95% 97.5%   99% 
## 0.332 0.450 0.528 0.563 
## Permutation: free
## Number of permutations: 999

# The sole metric that argues for a significant difference of beta-diversity is SES(MNTD). Altogether we do not observe significant differences of beta diversity between active gut communities during tadpole growth and metamorphosis. The sole metric that argues for a significant difference is SES(MNTD)

xplate_analysis_nmt3

N. Pollet

07/05/2020

METATETARD

Phylum abundance analysis of OTUs

on 16S rRNA RNA data