#Comparaisons early tadpoles
#Chargement des packages
library(picante)
library(ggplot2)
library(phytools)
library(dplyr)
library(grid)
library(gridExtra)
library(ggstatsplot)
## Folders, Themes, colors
source("prelude.R")
# Setting up directories
data_dir <-paste0(data_dir_path,"xpearly")
output_dir <- paste0(output_dir_path,"Supp_Figure_5")
###Rarefaction analysis
# Defining the input directory
setwd(data_dir)
comm_abundance_1<-read.table("nmt3_abondance_early_tpn_notax.csv",sep=";",header=TRUE,row.names=1)
# Getting rid of OTUs with zero abundance
# Rarefaction
comm_abundance_1<-comm_abundance_1 %>% select_if((function(col) is.numeric(col) && sum(col) > 0))
S<-specnumber(comm_abundance_1)
raremax <- min(rowSums(comm_abundance_1))
Srare <- rarefy(comm_abundance_1, raremax)
#Defining the output directory
setwd(output_dir)
# Rarefaction
plot(S, Srare, xlab = "Observed No. of Species", ylab = "Rarefied No. of Species")
print(abline(0, 1))
## NULL
# Rarefaction curve
rarecurve(comm_abundance_1, step = 20, sample = raremax, col = "blue", cex = 0.6)
#Saving plots to files
#pdf("plot_rarefaction1_xpearly.pdf",width=5,height=4)
#print(plot(S, Srare, xlab = "Observed No. of Species", ylab = "Rarefied No. of Species"))
#print(abline(0, 1))
#dev.off()
#pdf("plot_rarefaction2_xpearly.pdf",width=5,height=4)
#print(rarecurve(comm_abundance_1, step = 20, sample = raremax, col = "blue", cex = 0.6))
#dev.off()
### Diversity analysis
# Defining the directory containing the data to import
setwd(data_dir)
# Working on abundance data from 100 rarefactions
allcom<-read.table("xpearly_fichier_allcom.txt", header=TRUE, row.names=1)
# Getting rid of OTUs with zero abundance
allcom<-allcom %>% select_if((function(col) is.numeric(col) && sum(col) > 0))
commRS<-decostand(allcom, method="total")
comm<-commRS
# Importing metadata
metadata<-read.csv("nmt3_metadata_tearlydev.csv",sep=";",header=TRUE,row.names=1)
# Importing phylogeny
phy<-read.tree("nmt3_sequences_mpr.tree")
# Rooting the tree using midpoint rooting
phy<-midpoint.root(phy)
# Getting rid of taxa that are missing in the studied samples
phy<-prune.sample(comm, phy)
# Combining the input data
combined <- match.phylo.comm(phy, comm)
phy <- combined$phy
comm <- combined$comm
metadata <- metadata[rownames(comm), ]
# Checking the index
all.equal(rownames(comm), rownames(metadata))
## [1] TRUE
# Checking the normalisation
rowSums(comm)
## SD-SL-NF41 SD-SL-NF48 SD-SL-NF50 SD-SL-NF52 SD-SL-PE SD-TGA-NF41
## 1 1 1 1 1 1
## SD-TGA-NF48 SD-TGA-NF50 SD-TGA-NF52 SD-TGA-PE
## 1 1 1 1
#######################################
# Analysis of taxonomic diversity #
#######################################
# Defining the output dir
setwd(output_dir)
# For the boxplot with ggplot2
# OTU richness
df<-data.frame(Nb.OTU= specnumber(comm),Sample=metadata$Class,Population=metadata$Population)
ggplot(df,aes(x=Sample,y=Nb.OTU,fill=Population))+geom_bar(position="dodge",stat="identity")+theme_npgray()+scale_fill_grey()+scale_x_discrete(limits=c("Embryo NF30","Tadpole NF41","Tadpole NF48","Tadpole NF50","Tadpole NF52"),labels=c("NF30","NF41","NF48","NF50","NF52"))+xlab("Developmental stage")
ggsave("Plot_stage_early_1.pdf",width=10,height=13,units="cm")
# OTU richness Boxplot with significance
set.seed(623)
ggstatsplot::ggbetweenstats(data=df,x=Sample,y=Nb.OTU,type="np")
## Note: 95% CI for effect size estimate was computed with 100 bootstrap samples.
##
## Note: Shapiro-Wilk Normality Test for Nb.OTU: p-value = 0.375
##
## Note: Bartlett's test for homogeneity of variances for factor Sample: p-value = 0.283
##
ggsave("Plot_stage_early_1_stats.pdf",width=10,height=13,units="cm")
# There is no significant differences of OTU richness across the developmental stages, likely because the number of sample is only N=2 (though each sample is a pool of five individuals).
# Analysis of phylogenetic diversity
comm.pd <- pd(comm, phy)
# For the boxplot with ggplot2
df2<-data.frame(Faith.PD= comm.pd$PD,Sample=metadata$Class,Population=metadata$Population)
# Faith's PD boxplot
ggplot(df2,aes(x=Sample,y=Faith.PD,fill=Population))+geom_bar(position="dodge",stat="identity")+theme_npgray()+scale_fill_grey()+scale_x_discrete(limits=c("Embryo NF30","Tadpole NF41","Tadpole NF48","Tadpole NF50","Tadpole NF52"), labels=c("NF30","NF41","NF48","NF50","NF52"))+xlab("Developmental stage")
ggsave("Plot_stage_early_2.pdf",width=10,height=13,units="cm")
# Phylogenetic diversity Boxplot with significance
set.seed(623)
ggstatsplot::ggbetweenstats(data=df2,x=Sample,y=Faith.PD,type="np")
## Note: 95% CI for effect size estimate was computed with 100 bootstrap samples.
##
## Note: Shapiro-Wilk Normality Test for Faith.PD: p-value = 0.815
##
## Note: Bartlett's test for homogeneity of variances for factor Sample: p-value = 0.064
##
ggsave("Plot_stage_early_2_stats.pdf",width=10,height=13,units="cm")
# There is no significant differences of phylogenetic diversity across the developmental stages, likely because the number of sample is only N=2 (though each sample is a pool of five individuals).
# OTU richness and Faith's PD box plots side by side
plot1<-ggplot(df,aes(x=Sample,y=Nb.OTU,fill=Population))+geom_bar(position="dodge",stat="identity")+theme_npgray()+scale_fill_grey()+scale_x_discrete(limits=c("Embryo NF30","Tadpole NF41","Tadpole NF48","Tadpole NF50","Tadpole NF52"), labels=c("NF30","NF41","NF48","NF50","NF52"))+xlab("Developmental stage")+guides(fill=FALSE)
plot2<-ggplot(df2,aes(x=Sample,y=Faith.PD,fill=Population))+geom_bar(position="dodge",stat="identity")+theme_npgray()+scale_fill_grey()+scale_x_discrete(limits=c("Embryo NF30","Tadpole NF41","Tadpole NF48","Tadpole NF50","Tadpole NF52"), labels=c("NF30","NF41","NF48","NF50","NF52"))+xlab("Developmental stage")+guides(fill=FALSE)
# Plots side by side
grid.arrange(plot1, plot2, ncol=2)
#Saving plot to file
#pdf("plot12_stage_early.pdf",width=10,height=8)
#print(grid.arrange(plot1, plot2, ncol=2))
#dev.off()
# Diversity using Shannon index
comm.H<-diversity(comm, index="shannon", base=exp(1))
dfH<-data.frame(H=comm.H,Sample=metadata$Class,Population=metadata$Population)
plotH<-ggplot(dfH,aes(x=Sample,y=H,fill=Population))+geom_bar(position="dodge",stat="identity")+theme_npgray()+scale_fill_grey()+scale_x_discrete(limits=c("Embryo NF30","Tadpole NF41","Tadpole NF48","Tadpole NF50","Tadpole NF52"), labels=c("NF30","NF41","NF48","NF50","NF52"))+xlab("Developmental stage")+labs(y="Shannon diversity",x="")
plotH
# Shannon index boxplot including stats
set.seed(623)
ggstatsplot::ggbetweenstats(data=dfH,x=Sample,y=H,type="np")
## Note: 95% CI for effect size estimate was computed with 100 bootstrap samples.
##
## Note: Shapiro-Wilk Normality Test for H: p-value = 0.752
##
## Note: Bartlett's test for homogeneity of variances for factor Sample: p-value = 0.544
##
ggsave("plot_class_early_H_stats.pdf",width=6,height=6)
# There is no significant differences of Shannon index (alpha diversity) across the developmental stages, likely because the number of sample is only N=2 (though each sample is a pool of five individuals).
# Diversity using Simpson index
comm.D<-diversity(comm, index="simpson")
dfD<-data.frame(D=comm.D,Sample=metadata$Class,Population=metadata$Population)
plotD<-ggplot(dfD,aes(x=Sample,y=D,fill=Population))+geom_bar(position="dodge",stat="identity")+theme_npgray()+scale_fill_grey()+scale_x_discrete(limits=c("Embryo NF30","Tadpole NF41","Tadpole NF48","Tadpole NF50","Tadpole NF52"), labels=c("NF30","NF41","NF48","NF50","NF52"))+xlab("Developmental stage")+labs(y="Simpson diversity",x="")
plotD
# Simpson index boxplot including stats
set.seed(623)
ggstatsplot::ggbetweenstats(data=dfD,x=Sample,y=D,type="np")
## Note: 95% CI for effect size estimate was computed with 100 bootstrap samples.
##
## Note: Shapiro-Wilk Normality Test for D: p-value = 0.820
##
## Note: Bartlett's test for homogeneity of variances for factor Sample: p-value = 0.331
##
ggsave("plot_class_early_D_stats.pdf",width=6,height=6)
# There is no significant differences of Simpson index (alpha diversity) across the developmental stages, likely because the number of sample is only N=2 (though each sample is a pool of five individuals).
# Diversity using Pielou equitability index
comm.J <- comm.H/log(specnumber(comm), base=exp(1))
dfJ<-data.frame(J= comm.J,Sample=metadata$Class,Population=metadata$Population)
plotJ<-ggplot(dfJ,aes(x=Sample,y=J,fill=Population))+geom_bar(position="dodge",stat="identity")+theme_npgray()+scale_fill_grey()+scale_x_discrete(limits=c("Embryo NF30","Tadpole NF41","Tadpole NF48","Tadpole NF50","Tadpole NF52"), labels=c("NF30","NF41","NF48","NF50","NF52"))+xlab("Developmental stage")+labs(y="Pielou evenness",x="")
plotJ
# Pielou equitability index boxplot including stats
set.seed(623)
ggstatsplot::ggbetweenstats(data=dfJ,x=Sample,y=J,type="np")
## Note: 95% CI for effect size estimate was computed with 100 bootstrap samples.
##
## Note: Shapiro-Wilk Normality Test for J: p-value = 0.666
##
## Note: Bartlett's test for homogeneity of variances for factor Sample: p-value = 0.497
##
ggsave("plot_class_early_J_stats.pdf",width=6,height=6)
# There is no significant differences of Pielou equitability (alpha diversity) across the developmental stages, likely because the number of sample is only N=2 (though each sample is a pool of five individuals).
# Boxplots of the three alpha diversity metrics
grid.arrange(plotH, plotD, plotJ,nrow=3,ncol=1)
# Saving plots to files
#pdf("plotHDJ_Stage.pdf",width=6,height=10)
#print(grid.arrange(plotH, plotD, plotJ,nrow=3,ncol=1))
#dev.off()
#Comparison of dissimilarity measures
rankindex(comm,comm,c("euc","man","bray","jac","kul"))
## euc man bray jac kul
## 0.1760211 0.4160738 0.4160738 0.4160738 0.4160738
# Checking that we have a correct sampling of the diversity in our samples
plot(specaccum(comm), xlab = "# of samples", ylab = "# of species",main="Species accumulation in early tadpole's gut active microbiomes" )
# Saving plot to file
#pdf("plot_spec_accum_stage.pdf",width=7,height=5)
#print(plot(specaccum(comm), xlab = "# of samples", ylab = "# of species",main="Species accumulation in early tadpole's gut active microbiomes" ))
#dev.off()
# Computing dissimilarity using Bray-Curtis distance (weighted)
comm.bc.dist <- vegdist(comm, method = "bray")
# Clustering to analyse samples - Bray
comm.bc.clust <- hclust(comm.bc.dist, method = "average")
# Saving plot to file
#pdf("plot_bc_clust_early.pdf",width=8,height=6)
#print(plot(comm.bc.clust, ylab = "Bray-Curtis dissimilarity"))
#dev.off()
#Dissimilarity using Sorenson (unweighted)
comm.so.dist<-vegdist(comm,method="bray",binary="TRUE")
# Clustering to analyse samples - Sorenson
comm.so.clust <- hclust(comm.so.dist, method = "average")
# Saving plot to file
#pdf("plot_so_clust_early.pdf",width=8,height=6)
#print(plot(comm.so.clust, ylab = "Sorenson dissimilarity"))
#dev.off()
################################
# Ordination NMDS Bray-Curtis #
################################
# Ordination using NMDS
comm.bc.mds <- metaMDS(comm, dist = "bray")
## Run 0 stress 0.1158437
## Run 1 stress 0.1766634
## Run 2 stress 0.1158437
## ... Procrustes: rmse 6.15084e-05 max resid 0.0001102543
## ... Similar to previous best
## Run 3 stress 0.1158437
## ... New best solution
## ... Procrustes: rmse 3.18911e-05 max resid 5.638373e-05
## ... Similar to previous best
## Run 4 stress 0.129208
## Run 5 stress 0.123035
## Run 6 stress 0.1158437
## ... Procrustes: rmse 1.02154e-05 max resid 1.774324e-05
## ... Similar to previous best
## Run 7 stress 0.1158437
## ... Procrustes: rmse 3.718608e-05 max resid 7.828764e-05
## ... Similar to previous best
## Run 8 stress 0.1290646
## Run 9 stress 0.1468006
## Run 10 stress 0.1730187
## Run 11 stress 0.1230349
## Run 12 stress 0.129069
## Run 13 stress 0.1230349
## Run 14 stress 0.1292063
## Run 15 stress 0.1467997
## Run 16 stress 0.1158438
## ... Procrustes: rmse 0.0001308127 max resid 0.0002372645
## ... Similar to previous best
## Run 17 stress 0.1292068
## Run 18 stress 0.1158437
## ... Procrustes: rmse 5.941338e-05 max resid 0.0001120854
## ... Similar to previous best
## Run 19 stress 0.1485038
## Run 20 stress 0.1783666
## *** Solution reached
# We have a stress value of 0.18, thus the NMDS ordination provides a correct representation of the distances between the different developmental stage samples.
# Checking the NMDS
stressplot(comm.bc.mds)
# Saving plot to file
#pdf("plot_nmds_stressplot.pdf",width=8,height=6)
#print(stressplot(comm.bc.mds))
#dev.off()
# Plotting NMDS ordination using Bray-Curtis distance
mds.fig <- ordiplot(comm.bc.mds, type = "none")
points(mds.fig, "sites", pch = 19, col = "grey")
orditorp(comm.bc.mds, display = "sites")
# The first dimension seems to correspond to the progression across developmental stages. The stage NF52 stands out of this trend.
# Saving plot to file
#pdf("plot1_nmds.pdf",width=8,height=6)
#print(mds.fig <- ordiplot(comm.bc.mds, type = "none"))
#print(points(mds.fig, "sites", pch = 19, col = "grey"))
#print(orditorp(comm.bc.mds, display = "sites"))
#dev.off()
################################
# Ordination beta-dispersion #
################################
# Plotting the ordination results of beta-dispersion PCoA (PERMDISP)
comm.betadisp=betadisper(comm.bc.dist, metadata$Class, type="median")
plot(comm.betadisp)
# Like for the NMDS, the first dimension seems to correspond to the progression across developmental stages with stage NF52 standing out of this trend.
# Saving plot to file
#pdf("plot1_betadispa.pdf",width=8,height=6)
#print(mds.fig3a<-plot(comm.betadisp))
#dev.off()
# Significance
comm.betadisp.perm=permutest(comm.betadisp, group= metadata$Class, type="median", permutations=999, pairwise=TRUE)
comm.betadisp.perm
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 4 0.040438 0.01011 5.4191e+29 999 0.001 ***
## Residuals 5 0.000000 0.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
## Embryo NF30 Tadpole NF41 Tadpole NF48 Tadpole NF50 Tadpole NF52
## Embryo NF30 4.0000e-03 1.2000e-02 6.0000e-03 0.015
## Tadpole NF41 4.9460e-31 1.3000e-02 6.0000e-03 0.004
## Tadpole NF48 1.9542e-30 5.9413e-30 1.4000e-02 0.011
## Tadpole NF50 1.3237e-30 1.2339e-30 7.8650e-28 0.013
## Tadpole NF52 5.8480e-27 9.7311e-31 2.9691e-30 2.3918e-30
# There is a significant effect of developmental stage on the active gut microbiomes at early stages.
comm.betadisp.HSD<-TukeyHSD(comm.betadisp)
plot(comm.betadisp.HSD)
# Significant differences in communities are between NF30-NF41; NF30-NF50 and NF41-NF50.
#Anova to test the significance of variance homogeneity
anova(comm.betadisp)
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 4 0.040438 0.01011 5.4191e+29 < 2.2e-16 ***
## Residuals 5 0.000000 0.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# The anova result is concordant with the permutest, there is a significant effect of developmental stage on the active gut microbiomes at early stages.
#######################################################
# Analysis of community structure using MPD and MNTD #
#######################################################
# Phylogenetic distance to measure the community phylogenetic structure without abundance
phy.dist <- cophenetic(phy)
comm.sesmpd <- ses.mpd(comm, phy.dist, null.model = "richness", abundance.weighted = FALSE, runs = 999)
comm.sesmntd <- ses.mntd(comm, phy.dist, null.model = "richness", abundance.weighted = FALSE, runs = 999)
# Inspecting obtained values
#comm.sesmpd
#comm.sesmntd
#Figure with comparisons SES(MPD) and SES(MNTD)
df3<-data.frame(SES_MPD = comm.sesmpd$mpd.obs.z, Sample = metadata$Class, Population=metadata$Population)
plot3<-ggplot(df3,aes(x=Sample,y=SES_MPD,color=Population))+geom_point()+scale_x_discrete(limits=c("Embryo NF30","Tadpole NF41","Tadpole NF48","Tadpole NF50","Tadpole NF52"), labels=c("NF30","NF41","NF48","NF50","NF52"))+theme_nb()+labs(x="",y="SES MPD")+guides(color=FALSE)
df4<-data.frame(SES_MNTD = comm.sesmntd$mntd.obs.z, Sample = metadata$Class, Population=metadata$Population)
plot4<-ggplot(df4,aes(x=Sample,y=SES_MNTD,color=Population))+geom_point()+scale_x_discrete(limits=c("Embryo NF30","Tadpole NF41","Tadpole NF48","Tadpole NF50","Tadpole NF52"), labels=c("NF30","NF41","NF48","NF50","NF52"))+theme_nb()+labs(x="",y="SES MNTD")+guides(color=FALSE)
grid.arrange(plot3, plot4, ncol=2)
# Saving plot to file
#pdf("plot34_sesmpdmntd.pdf",width=5,height=4)
#print(grid.arrange(plot3, plot4, ncol=2))
#dev.off()
# Significance
set.seed(623)
ggstatsplot::ggbetweenstats(data=df3,x=Sample,y=SES_MPD,type="np")
## Note: 95% CI for effect size estimate was computed with 100 bootstrap samples.
##
## Note: Shapiro-Wilk Normality Test for SES_MPD: p-value = 0.206
##
## Note: Bartlett's test for homogeneity of variances for factor Sample: p-value = 0.121
##
ggsave("plot_SESMPD_stat.pdf",width=5,height=4)
# There is no significant difference of beta-diversity using SES(MPD).
ggstatsplot::ggbetweenstats(data=df4,x=Sample,y=SES_MNTD,type="np")
## Note: 95% CI for effect size estimate was computed with 100 bootstrap samples.
##
## Note: Shapiro-Wilk Normality Test for SES_MNTD: p-value = 0.014
##
## Note: Bartlett's test for homogeneity of variances for factor Sample: p-value = 0.174
##
ggsave("plot_SESMNTD_stat.pdf",width=5,height=4)
# There is no significant difference of beta-diversity using SES(MNTD).
# Beta-diversité phylogénétique
# calculate phylogenetic MNTD beta diversity
comm.mntd.dist <- comdistnt(comm, phy.dist, abundance.weighted = TRUE)
# calculate Mantel correlation for taxonomic Bray-Curtis vs. phylogenetic
# MNTD diversity
mantel(comm.bc.dist, comm.mntd.dist)
##
## Mantel statistic based on Pearson's product-moment correlation
##
## Call:
## mantel(xdis = comm.bc.dist, ydis = comm.mntd.dist)
##
## Mantel statistic r: 0.4947
## Significance: 0.001
##
## Upper quantiles of permutations (null model):
## 90% 95% 97.5% 99%
## 0.242 0.301 0.342 0.378
## Permutation: free
## Number of permutations: 999
# Taxonomic (Bray-Curtis) dissimilarity explained
adonis(comm.bc.dist ~ Class, data = metadata)
##
## Call:
## adonis(formula = comm.bc.dist ~ Class, data = metadata)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Class 4 1.20122 0.3003 2.4199 0.65939 0.014 *
## Residuals 5 0.62048 0.1241 0.34061
## Total 9 1.82170 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# There is a significant effect (P=0.014) of the developmental stage on the taxonomic structure of communities based on Bray-Curtis dissimilarity.
# Phylogenetic dissimilarity explained
adonis(comm.mntd.dist ~ Class, data = metadata)
##
## Call:
## adonis(formula = comm.mntd.dist ~ Class, data = metadata)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Class 4 2.7109e-05 6.7772e-06 9.0696 0.87887 0.001 ***
## Residuals 5 3.7362e-06 7.4720e-07 0.12113
## Total 9 3.0845e-05 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# There is a significant effect (P=0.001) of the developmental stage on the phylogenetic composition of communities based on MNTD dissimilarity.