# Turning off message and warning for rendering
#Loading packages
library(picante)
library(ggplot2)
library(phytools)
library(dplyr)
library(grid)
library(gridExtra)
library(ggstatsplot)
## Folders, Themes, colors
source("prelude.R")
# Setting up directories
data_dir <-paste0(data_dir_path,"xplate")
output_dir <- paste0(output_dir_path,"Figure_late")
# Defining the directory containing the data to import
setwd(data_dir)
### Rarefaction analysis : importing data
comm_abundance_1<-read.table("nmt3_abondance_early_tpn_notax.csv",sep=";",header=TRUE,row.names=1)
# Defining the output dir
setwd(output_dir)
# Getting rid of OTUs with zero abundance
comm_abundance_1<-comm_abundance_1 %>% select_if((function(col) is.numeric(col) && sum(col) > 0))
S<-specnumber(comm_abundance_1)
raremax <- min(rowSums(comm_abundance_1))
Srare <- rarefy(comm_abundance_1, raremax)
#Rarefaction
plot(S, Srare, xlab = "Observed No. of Species", ylab = "Rarefied No. of Species")
abline(0, 1)
# Saving plot as a pdf file
#pdf("plot_rarefaction1_rna.pdf",width=5,height=4)
#print(plot(S, Srare, xlab = "Observed No. of Species", ylab = "Rarefied No. of Species"))
#print(abline(0, 1))
#dev.off()
# Rarefaction curve
rarecurve(comm_abundance_1, step = 20, sample = raremax, col = "blue", cex = 0.6)
# Saving plot as a pdf file
#pdf("plot_rarefaction2_rna.pdf",width=5,height=4)
#print(rarecurve(comm_abundance_1, step = 20, sample = raremax, col = "blue", cex = 0.6))
#dev.off()
###Diversity analysis
# Defining the directory containing the data to import
setwd(data_dir)
# Working on abundance data from 100 rarefactions
allcom<-read.table("xplate_fichier_allcom.txt", header=TRUE, row.names=1)
# Getting rid of OTUs with zero abundance
allcom<-allcom %>% select_if((function(col) is.numeric(col) && sum(col) > 0))
commRS<-decostand(allcom, method="total")
comm<-commRS
#Importing metadata
metadata<-read.csv("nmt3_metadata_late.csv",sep=";",header=TRUE,row.names=1)
metadata$DevelopmentalStage=as.factor(metadata$DevelopmentalStage)
#Importing phylogeny
phy<-read.tree("nmt3_sequences_mpr.tree")
#Rooting the tree using midpoint rooting
phy<-midpoint.root(phy)
#Getting rid of taxa that are missing in the studied samples
phy<-prune.sample(comm, phy)
#Combining the input data
combined <- match.phylo.comm(phy, comm)
phy <- combined$phy
comm <- combined$comm
metadata <- metadata[rownames(comm), ]
#Checking the index
all.equal(rownames(comm), rownames(metadata))
## [1] TRUE
#Checking the normalisation
rowSums(comm)
## SD-SL-iNF52 SD-SL-iNF56 SD-SL-iNF60 SD-SL-iNF63 SD-SL-iNF66 SD-TGA-iNF52
## 1 1 1 1 1 1
## SD-TGA-iNF56 SD-TGA-iNF60 SD-TGA-iNF63 SD-TGA-iNF66
## 1 1 1 1
#######################################
# Analysis of taxonomic diversity #
#######################################
# Defining the output dir
setwd(output_dir)
# For the boxplot with ggplot2
df<-data.frame(Nb.OTU= specnumber(comm),DevelopmentalStage=metadata$DevelopmentalStage,Population=metadata$Population)
ggplot(df,aes(x=DevelopmentalStage,y=Nb.OTU))+geom_bar(position="dodge",stat="identity")+theme_npgray()+scale_fill_grey()+scale_x_discrete(limits=c("NF52","NF56","NF60","NF63","NF66"))+xlab("Developmental stage")+facet_grid(~Population)
ggsave("Plot_stage_early_1.pdf",width=10,height=13,units="cm")
#Analysis of phylogenetic diversity
comm.pd <- pd(comm, phy)
# For the boxplot with ggplot2
df2<-data.frame(Faith.PD= comm.pd$PD,DevelopmentalStage=metadata$DevelopmentalStage,Population=metadata$Population)
ggplot(df2,aes(x=DevelopmentalStage,y=Faith.PD))+geom_bar(position="dodge",stat="identity")+theme_npgray()+scale_fill_grey()+scale_x_discrete(limits=c("NF52","NF56","NF60","NF63","NF66"))+xlab("Developmental stage")+facet_grid(~Population)+xlab("Developmental stage")
ggsave("Plot_stage_early_2.pdf",width=10,height=13,units="cm")
# Shannon index
comm.H<-diversity(comm, index="shannon", base=exp(1))
dfH<-data.frame(H=comm.H,DevelopmentalStage=metadata$DevelopmentalStage,Population=metadata$Population)
plotH<-ggplot(dfH,aes(x=DevelopmentalStage,y=H,fill=Population))+geom_bar(position="dodge",stat="identity")+scale_fill_grey()+scale_x_discrete(limits=c("NF52","NF56","NF60","NF63","NF66"))+facet_grid(~Population)+xlab("Developmental stage")+labs(y="Shannon diversity",x="")
# Simpson diversity index
comm.D<-diversity(comm, index="simpson")
dfD<-data.frame(D=comm.D,DevelopmentalStage=metadata$DevelopmentalStage,Population=metadata$Population)
plotD<-ggplot(dfD,aes(x=DevelopmentalStage,y=D,fill=Population))+geom_bar(position="dodge",stat="identity")+scale_fill_grey()+scale_x_discrete(limits=c("NF52","NF56","NF60","NF63","NF66"))+facet_grid(~Population)+xlab("Developmental stage")+labs(y="Simpson diversity",x="")
# Pielou equitability index
comm.J <- comm.H/log(specnumber(comm), base=exp(1))
dfJ<-data.frame(J= comm.J,DevelopmentalStage=metadata$DevelopmentalStage,Population=metadata$Population)
plotJ<-ggplot(dfJ,aes(x=DevelopmentalStage,y=J,fill=Population))+geom_bar(position="dodge",stat="identity")+scale_fill_grey()+scale_x_discrete(limits=c("NF52","NF56","NF60","NF63","NF66"))+facet_grid(~Population)+xlab("Developmental stage")+labs(y="Pielou evenness",x="")
# Plots of the three alpha-diversity metrics
grid.arrange(plotH, plotD, plotJ,nrow=3,ncol=1)
# Saving plot as a pdf file
#pdf("plotHDJ_Stage.pdf",width=6,height=10)
#print(grid.arrange(plotH, plotD, plotJ,nrow=3,ncol=1))
#dev.off()
# Comparison of dissimilarity measures
rankindex(comm,comm,c("euc","man","bray","jac","kul"))
## euc man bray jac kul
## 0.7003953 0.8367589 0.8367589 0.8367589 0.8367589
# Checking that we have a correct sampling of the diversity in our samples
plot(specaccum(comm), xlab = "# of samples", ylab = "# of species",main="Species accumulation in tadpole's gut microbiomes " )
# Saving plot as a pdf file
#pdf("plot_spec_accum_stage.pdf",width=7,height=5)
#print(plot(specaccum(comm), xlab = "# of samples", ylab = "# of species",main="Species accumulation in tadpole's gut microbiomes " ))
#dev.off()
# Computing dissimilarity using Bray-Curtis distance (weighted)
comm.bc.dist <- vegdist(comm, method = "bray")
# Clustering to analyse samples - Bray
comm.bc.clust <- hclust(comm.bc.dist, method = "average")
plot(comm.bc.clust, ylab = "Bray-Curtis dissimilarity")
# Saving plot as a pdf file
#pdf("plot_bc_clust_early.pdf",width=8,height=6)
#print(plot(comm.bc.clust, ylab = "Bray-Curtis dissimilarity"))
#dev.off()
#Dissimilarity using Sorenson (unweighted)
comm.so.dist<-vegdist(comm,method="bray",binary="TRUE")
# Clustering to analyse samples - Sorenson
comm.so.clust <- hclust(comm.so.dist, method = "average")
plot(comm.so.clust, ylab = "Sorenson dissimilarity")
# Saving plot as a pdf file
#pdf("plot_so_clust_early.pdf",width=8,height=6)
#print(plot(comm.so.clust, ylab = "Sorenson dissimilarity"))
#dev.off()
# Ordination using NMDS
comm.bc.mds <- metaMDS(comm, dist = "bray")
## Run 0 stress 0.0005231172
## Run 1 stress 0.0004424161
## ... New best solution
## ... Procrustes: rmse 0.0006074575 max resid 0.00116398
## ... Similar to previous best
## Run 2 stress 0.0001601801
## ... New best solution
## ... Procrustes: rmse 0.1131774 max resid 0.2735635
## Run 3 stress 9.678089e-05
## ... New best solution
## ... Procrustes: rmse 0.0001981568 max resid 0.0004157821
## ... Similar to previous best
## Run 4 stress 9.891049e-05
## ... Procrustes: rmse 0.007381759 max resid 0.01789149
## Run 5 stress 9.972065e-05
## ... Procrustes: rmse 0.0001616688 max resid 0.0003259267
## ... Similar to previous best
## Run 6 stress 0.0001513282
## ... Procrustes: rmse 0.1113245 max resid 0.2893559
## Run 7 stress 9.451304e-05
## ... New best solution
## ... Procrustes: rmse 1.475005e-05 max resid 3.149516e-05
## ... Similar to previous best
## Run 8 stress 0.000335401
## ... Procrustes: rmse 0.04320001 max resid 0.1069251
## Run 9 stress 0.0001574778
## ... Procrustes: rmse 0.08518418 max resid 0.2172328
## Run 10 stress 0.0002239629
## ... Procrustes: rmse 0.08664037 max resid 0.2211307
## Run 11 stress 0.0006157232
## Run 12 stress 0.0003270883
## ... Procrustes: rmse 0.04129314 max resid 0.1020825
## Run 13 stress 9.984267e-05
## ... Procrustes: rmse 0.02268405 max resid 0.05549907
## Run 14 stress 0.0002451062
## ... Procrustes: rmse 0.07597814 max resid 0.1924212
## Run 15 stress 0.0001638108
## ... Procrustes: rmse 0.07991811 max resid 0.2030288
## Run 16 stress 9.588592e-05
## ... Procrustes: rmse 1.261972e-05 max resid 2.505573e-05
## ... Similar to previous best
## Run 17 stress 0.0005470961
## ... Procrustes: rmse 0.1136224 max resid 0.2951274
## Run 18 stress 0.0004382263
## ... Procrustes: rmse 0.06834923 max resid 0.172019
## Run 19 stress 0.0002115468
## ... Procrustes: rmse 0.04119632 max resid 0.101906
## Run 20 stress 9.660443e-05
## ... Procrustes: rmse 8.502147e-05 max resid 0.0001774282
## ... Similar to previous best
## *** Solution reached
# We have a stress value of 1e-4, thus the NMDS ordination provides an excellent representation of the distances between the active bacterial communities of the tadpole's gut samples.
# Checking the NMDS
stressplot(comm.bc.mds)
#
# Saving plot as a pdf file
#pdf("plot_nmds_stressplot.pdf",width=8,height=6)
#print(stressplot(comm.bc.mds))
#dev.off()
mds.fig <- ordiplot(comm.bc.mds, type = "none")
points(mds.fig, "sites", pch = 19, col = "grey")
orditorp(comm.bc.mds, display = "sites")
# Saving plot to a file
#pdf("plot1_nmds.pdf",width=8,height=6)
#print(mds.fig <- ordiplot(comm.bc.mds, type = "none"))
#print(points(mds.fig, "sites", pch = 19, col = "grey"))
#print(orditorp(comm.bc.mds, display = "sites"))
#dev.off()
# Plotting NMDS ordination using Bray-Curtis distance
#par(new=TRUE)
mds.fig2 <- ordiplot(comm.bc.mds, type = "none")
points(mds.fig2, "species", pch = 1,cex=0.5, col = "grey")
ordispider(comm.bc.mds,groups=metadata$DevelopmentalStage,show.groups="NF52",col="green")
points(mds.fig2, "sites", pch = 19, col = "green", select = metadata$DevelopmentalStage == "NF52")
ordispider(comm.bc.mds,groups=metadata$DevelopmentalStage,show.groups="NF56",col="black")
points(mds.fig2, "sites", pch = 19, col = "black", select = metadata$DevelopmentalStage == "NF56")
ordispider(comm.bc.mds,groups=metadata$DevelopmentalStage,show.groups="NF60",col="orange")
points(mds.fig2, "sites", pch = 19, col = "orange", select = metadata$DevelopmentalStage == "NF60")
ordispider(comm.bc.mds,groups=metadata$DevelopmentalStage,show.groups="NF63",col="black")
points(mds.fig2, "sites", pch = 19, col = "blue", select = metadata$DevelopmentalStage == "NF63")
ordispider(comm.bc.mds,groups=metadata$DevelopmentalStage,show.groups="NF66",col="red")
points(mds.fig2, "sites", pch = 19, col = "red", select = metadata$DevelopmentalStage == "NF66")
ordiellipse(comm.bc.mds, metadata$DevelopmentalStage, conf = 0.95, label = FALSE,lty = 'dotted')
################################
# Ordination beta-dispersion #
################################
# Plotting the ordination results of beta-dispersion PCoA (PERMDISP)
comm.betadisp=betadisper(comm.bc.dist, metadata$DevelopmentalStage, type="median")
comm.betadisp
##
## Homogeneity of multivariate dispersions
##
## Call: betadisper(d = comm.bc.dist, group = metadata$DevelopmentalStage,
## type = "median")
##
## No. of Positive Eigenvalues: 9
## No. of Negative Eigenvalues: 0
##
## Average distance to median:
## NF52 NF56 NF60 NF63 NF66
## 0.03869 0.07199 0.10865 0.11824 0.04825
##
## Eigenvalues for PCoA axes:
## (Showing 8 of 9 eigenvalues)
## PCoA1 PCoA2 PCoA3 PCoA4 PCoA5 PCoA6 PCoA7 PCoA8
## 0.049240 0.030483 0.014086 0.009005 0.003876 0.002191 0.002047 0.001274
mds.fig3a<-plot(comm.betadisp)
#Saving plot to a file
#pdf("plot1_betadispa.pdf",width=8,height=6)
#print(mds.fig3a<-plot(comm.betadisp))
#dev.off()
# Significance
comm.betadisp.perm=permutest(comm.betadisp, group= metadata$DevelopmentalStage, type="median", permutations=999, pairwise=TRUE)
comm.betadisp.perm
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 4 0.010044 0.0025111 2.3018e+29 999 0.001 ***
## Residuals 5 0.000000 0.0000000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
## NF52 NF56 NF60 NF63 NF66
## NF52 8.0000e-03 5.0000e-03 4.0000e-03 0.011
## NF56 1.9580e-29 7.0000e-03 1.5000e-02 0.016
## NF60 2.4591e-31 1.6407e-29 1.1000e-02 0.005
## NF63 1.7878e-30 1.5034e-29 1.2665e-28 0.009
## NF66 5.2662e-30 3.7844e-29 2.2435e-31 2.2310e-30
#######################################################
# Analysis of community structure using MDP and MNTD #
#######################################################
#Phylogenetic distance to measure the community phylogenetic structure without abundance
phy.dist <- cophenetic(phy)
comm.sesmpd <- ses.mpd(comm, phy.dist, null.model = "richness", abundance.weighted = FALSE, runs = 9)
comm.sesmntd <- ses.mntd(comm, phy.dist, null.model = "richness", abundance.weighted = FALSE, runs = 9)
#Inspecting obtained values
#comm.sesmpd
#comm.sesmntd
#Figure with comparisons SES(MPD) and SES(MNTD)
df3<-data.frame(SES_MPD = comm.sesmpd$mpd.obs.z, DevelopmentalStage = metadata$DevelopmentalStage,Population=metadata$Population)
plot3<-ggplot(df3,aes(x=DevelopmentalStage,y=SES_MPD,color=Population))+geom_jitter(width=0.1,height=0.01,size=5,alpha=0.9)+scale_x_discrete(limits=c("NF52","NF56","NF60","NF63","NF66"))+theme_nb()+labs(x="",y="SES MPD")+guides(color=FALSE)
df4<-data.frame(SES_MNTD = comm.sesmntd$mntd.obs.z, DevelopmentalStage = metadata$DevelopmentalStage,Population=metadata$Population)
plot4<-ggplot(df4,aes(x=DevelopmentalStage,y=SES_MNTD,color=Population))+geom_jitter(width=0.1,height=0.01,size=5,alpha=0.9)+scale_x_discrete(limits=c("NF52","NF56","NF60","NF63","NF66"))+theme_nb()+labs(x="",y="SES MNTD")+guides(color=FALSE)
grid.arrange(plot3, plot4, ncol=2)
# Saving plot to a file
#pdf("plot34_sesmpdmntd.pdf",width=5,height=4)
#print(grid.arrange(plot3, plot4, ncol=2))
#dev.off()
#Significance
#Using Kruskal-Wallis test
kw_sesmpd<-kruskal.test(comm.sesmpd$mpd.obs.z ~ metadata$DevelopmentalStage, data= comm)
kw_sesmntd<-kruskal.test(comm.sesmntd$mntd.obs.z ~ metadata$DevelopmentalStage, data= comm)
set.seed(623)
ggstatsplot::ggbetweenstats(data=df3,x=DevelopmentalStage,y=SES_MPD,mean.plotting = FALSE,type="np",plot.type = "box")
## Note: 95% CI for effect size estimate was computed with 100 bootstrap samples.
##
## Note: Shapiro-Wilk Normality Test for SES_MPD: p-value = 0.206
##
## Note: Bartlett's test for homogeneity of variances for factor DevelopmentalStage: p-value = 0.494
##
ggsave("plot_SESMPD_stat.pdf",width=5,height=4)
ggstatsplot::ggbetweenstats(data=df4,x=DevelopmentalStage,y=SES_MNTD,mean.plotting = FALSE,type="np",plot.type = "box")
## Note: 95% CI for effect size estimate was computed with 100 bootstrap samples.
##
## Note: Shapiro-Wilk Normality Test for SES_MNTD: p-value = 0.726
##
## Note: Bartlett's test for homogeneity of variances for factor DevelopmentalStage: p-value = 0.267
##
ggsave("plot_SESMNTD_stat.pdf",width=5,height=4)
#####################################
# Phylogenetic Beta-diversity #
#####################################
# beta diversity: Taxonomic (Bray-Curtis) dissimilarity explained (Permanova)
adonis(comm.bc.dist ~ DevelopmentalStage, data = metadata)
##
## Call:
## adonis(formula = comm.bc.dist ~ DevelopmentalStage, data = metadata)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## DevelopmentalStage 4 0.043397 0.010849 0.77953 0.3841 0.738
## Residuals 5 0.069588 0.013917 0.6159
## Total 9 0.112984 1.0000
# beta diversity: compute phylogenetic dissimilarity using MPD
comm.mpd.dist<-comdist(comm,phy.dist,abundance.weighted=TRUE)
adonis(comm.mpd.dist ~ DevelopmentalStage, data = metadata)
##
## Call:
## adonis(formula = comm.mpd.dist ~ DevelopmentalStage, data = metadata)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## DevelopmentalStage 4 0.51821 0.12955 0.99965 0.44436 0.481
## Residuals 5 0.64798 0.12960 0.55564
## Total 9 1.16619 1.00000
# beta diversity: compute phylogenetic dissimilarity using MNTD
comm.mntd.dist <- comdistnt(comm, phy.dist, abundance.weighted = TRUE)
adonis(comm.mntd.dist ~ DevelopmentalStage, data = metadata)
##
## Call:
## adonis(formula = comm.mntd.dist ~ DevelopmentalStage, data = metadata)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## DevelopmentalStage 4 -4.7584e-06 -1.1896e-06 -0.29432 -0.30797 1
## Residuals 5 2.0209e-05 4.0419e-06 1.30797
## Total 9 1.5451e-05 1.00000
# calculate Mantel correlation for taxonomic Bray-Curtis vs. phylogenetic
# MPD diversity
mantel(comm.bc.dist, comm.mpd.dist)
##
## Mantel statistic based on Pearson's product-moment correlation
##
## Call:
## mantel(xdis = comm.bc.dist, ydis = comm.mpd.dist)
##
## Mantel statistic r: 0.2764
## Significance: 0.155
##
## Upper quantiles of permutations (null model):
## 90% 95% 97.5% 99%
## 0.369 0.437 0.522 0.594
## Permutation: free
## Number of permutations: 999
#MNTD diversity
mantel(comm.bc.dist, comm.mntd.dist)
##
## Mantel statistic based on Pearson's product-moment correlation
##
## Call:
## mantel(xdis = comm.bc.dist, ydis = comm.mntd.dist)
##
## Mantel statistic r: 0.7305
## Significance: 0.001
##
## Upper quantiles of permutations (null model):
## 90% 95% 97.5% 99%
## 0.332 0.450 0.528 0.563
## Permutation: free
## Number of permutations: 999
# The sole metric that argues for a significant difference of beta-diversity is SES(MNTD). Altogether we do not observe significant differences of beta diversity between active gut communities during tadpole growth and metamorphosis. The sole metric that argues for a significant difference is SES(MNTD)