#Comparaisons early tadpoles

#Chargement des packages
library(picante)
library(ggplot2)
library(phytools)
library(dplyr)
library(grid)
library(gridExtra)
library(ggstatsplot)
## Folders, Themes, colors
source("prelude.R")

# Setting up directories
data_dir <-paste0(data_dir_path,"xpearly")
output_dir <- paste0(output_dir_path,"Supp_Figure_5")

###Rarefaction analysis
# Defining the input directory
setwd(data_dir)
comm_abundance_1<-read.table("nmt3_abondance_early_tpn_notax.csv",sep=";",header=TRUE,row.names=1)
# Getting rid of OTUs with zero abundance
# Rarefaction
comm_abundance_1<-comm_abundance_1 %>% select_if((function(col) is.numeric(col) && sum(col) > 0))
S<-specnumber(comm_abundance_1)
raremax <- min(rowSums(comm_abundance_1))
Srare <- rarefy(comm_abundance_1, raremax)
#Defining the output directory
setwd(output_dir)
# Rarefaction
plot(S, Srare, xlab = "Observed No. of Species", ylab = "Rarefied No. of Species")
print(abline(0, 1))

## NULL
# Rarefaction curve
rarecurve(comm_abundance_1, step = 20, sample = raremax, col = "blue", cex = 0.6)

#Saving plots to files
#pdf("plot_rarefaction1_xpearly.pdf",width=5,height=4)
#print(plot(S, Srare, xlab = "Observed No. of Species", ylab = "Rarefied No. of Species"))
#print(abline(0, 1))
#dev.off()
#pdf("plot_rarefaction2_xpearly.pdf",width=5,height=4)
#print(rarecurve(comm_abundance_1, step = 20, sample = raremax, col = "blue", cex = 0.6))
#dev.off()

### Diversity analysis
# Defining the directory containing the data to import
setwd(data_dir)
# Working on abundance data from 100 rarefactions
allcom<-read.table("xpearly_fichier_allcom.txt", header=TRUE, row.names=1)
# Getting rid of OTUs with zero abundance
allcom<-allcom %>% select_if((function(col) is.numeric(col) && sum(col) > 0))
commRS<-decostand(allcom, method="total")
comm<-commRS

# Importing metadata
metadata<-read.csv("nmt3_metadata_tearlydev.csv",sep=";",header=TRUE,row.names=1)
# Importing phylogeny
phy<-read.tree("nmt3_sequences_mpr.tree")
# Rooting the tree using midpoint rooting
phy<-midpoint.root(phy)
# Getting rid of taxa that are missing in the studied samples
phy<-prune.sample(comm, phy)
# Combining the input data
combined <- match.phylo.comm(phy, comm)
phy <- combined$phy
comm <- combined$comm
metadata <- metadata[rownames(comm), ]
# Checking the index
all.equal(rownames(comm), rownames(metadata))
## [1] TRUE
# Checking the normalisation
rowSums(comm)
##  SD-SL-NF41  SD-SL-NF48  SD-SL-NF50  SD-SL-NF52    SD-SL-PE SD-TGA-NF41 
##           1           1           1           1           1           1 
## SD-TGA-NF48 SD-TGA-NF50 SD-TGA-NF52   SD-TGA-PE 
##           1           1           1           1
#######################################
# Analysis of taxonomic diversity     #
#######################################
# Defining the output dir
setwd(output_dir)

# For the boxplot with ggplot2
# OTU richness
df<-data.frame(Nb.OTU= specnumber(comm),Sample=metadata$Class,Population=metadata$Population)
ggplot(df,aes(x=Sample,y=Nb.OTU,fill=Population))+geom_bar(position="dodge",stat="identity")+theme_npgray()+scale_fill_grey()+scale_x_discrete(limits=c("Embryo NF30","Tadpole NF41","Tadpole NF48","Tadpole NF50","Tadpole NF52"),labels=c("NF30","NF41","NF48","NF50","NF52"))+xlab("Developmental stage")

ggsave("Plot_stage_early_1.pdf",width=10,height=13,units="cm")
# OTU richness Boxplot with significance
set.seed(623)
ggstatsplot::ggbetweenstats(data=df,x=Sample,y=Nb.OTU,type="np")
## Note: 95% CI for effect size estimate was computed with 100 bootstrap samples.
## 
## Note: Shapiro-Wilk Normality Test for Nb.OTU: p-value = 0.375
## 
## Note: Bartlett's test for homogeneity of variances for factor Sample: p-value = 0.283
## 

ggsave("Plot_stage_early_1_stats.pdf",width=10,height=13,units="cm")
# There is no significant differences of OTU richness across the developmental stages, likely because the number of sample is only N=2 (though each sample is a pool of five individuals). 


# Analysis of phylogenetic diversity
comm.pd <- pd(comm, phy)
# For the boxplot with ggplot2
df2<-data.frame(Faith.PD= comm.pd$PD,Sample=metadata$Class,Population=metadata$Population)
# Faith's PD boxplot
ggplot(df2,aes(x=Sample,y=Faith.PD,fill=Population))+geom_bar(position="dodge",stat="identity")+theme_npgray()+scale_fill_grey()+scale_x_discrete(limits=c("Embryo NF30","Tadpole NF41","Tadpole NF48","Tadpole NF50","Tadpole NF52"), labels=c("NF30","NF41","NF48","NF50","NF52"))+xlab("Developmental stage")

ggsave("Plot_stage_early_2.pdf",width=10,height=13,units="cm")
# Phylogenetic diversity Boxplot with significance
set.seed(623)
ggstatsplot::ggbetweenstats(data=df2,x=Sample,y=Faith.PD,type="np")
## Note: 95% CI for effect size estimate was computed with 100 bootstrap samples.
## 
## Note: Shapiro-Wilk Normality Test for Faith.PD: p-value = 0.815
## 
## Note: Bartlett's test for homogeneity of variances for factor Sample: p-value = 0.064
## 

ggsave("Plot_stage_early_2_stats.pdf",width=10,height=13,units="cm")
# There is no significant differences of phylogenetic diversity across the developmental stages, likely because the number of sample is only N=2 (though each sample is a pool of five individuals). 

# OTU richness and Faith's PD box plots side by side
plot1<-ggplot(df,aes(x=Sample,y=Nb.OTU,fill=Population))+geom_bar(position="dodge",stat="identity")+theme_npgray()+scale_fill_grey()+scale_x_discrete(limits=c("Embryo NF30","Tadpole NF41","Tadpole NF48","Tadpole NF50","Tadpole NF52"), labels=c("NF30","NF41","NF48","NF50","NF52"))+xlab("Developmental stage")+guides(fill=FALSE)

plot2<-ggplot(df2,aes(x=Sample,y=Faith.PD,fill=Population))+geom_bar(position="dodge",stat="identity")+theme_npgray()+scale_fill_grey()+scale_x_discrete(limits=c("Embryo NF30","Tadpole NF41","Tadpole NF48","Tadpole NF50","Tadpole NF52"), labels=c("NF30","NF41","NF48","NF50","NF52"))+xlab("Developmental stage")+guides(fill=FALSE)

# Plots side by side 
grid.arrange(plot1, plot2, ncol=2)

#Saving plot to file
#pdf("plot12_stage_early.pdf",width=10,height=8)
#print(grid.arrange(plot1, plot2, ncol=2))
#dev.off()

# Diversity using Shannon index
comm.H<-diversity(comm, index="shannon", base=exp(1)) 
dfH<-data.frame(H=comm.H,Sample=metadata$Class,Population=metadata$Population)
plotH<-ggplot(dfH,aes(x=Sample,y=H,fill=Population))+geom_bar(position="dodge",stat="identity")+theme_npgray()+scale_fill_grey()+scale_x_discrete(limits=c("Embryo NF30","Tadpole NF41","Tadpole NF48","Tadpole NF50","Tadpole NF52"), labels=c("NF30","NF41","NF48","NF50","NF52"))+xlab("Developmental stage")+labs(y="Shannon diversity",x="")
plotH

# Shannon index boxplot including stats
set.seed(623)
ggstatsplot::ggbetweenstats(data=dfH,x=Sample,y=H,type="np")
## Note: 95% CI for effect size estimate was computed with 100 bootstrap samples.
## 
## Note: Shapiro-Wilk Normality Test for H: p-value = 0.752
## 
## Note: Bartlett's test for homogeneity of variances for factor Sample: p-value = 0.544
## 

ggsave("plot_class_early_H_stats.pdf",width=6,height=6)
# There is no significant differences of Shannon index (alpha diversity) across the developmental stages, likely because the number of sample is only N=2 (though each sample is a pool of five individuals). 

# Diversity using Simpson index
comm.D<-diversity(comm, index="simpson")
dfD<-data.frame(D=comm.D,Sample=metadata$Class,Population=metadata$Population)
plotD<-ggplot(dfD,aes(x=Sample,y=D,fill=Population))+geom_bar(position="dodge",stat="identity")+theme_npgray()+scale_fill_grey()+scale_x_discrete(limits=c("Embryo NF30","Tadpole NF41","Tadpole NF48","Tadpole NF50","Tadpole NF52"), labels=c("NF30","NF41","NF48","NF50","NF52"))+xlab("Developmental stage")+labs(y="Simpson diversity",x="")
plotD

# Simpson index boxplot including stats
set.seed(623)
ggstatsplot::ggbetweenstats(data=dfD,x=Sample,y=D,type="np")
## Note: 95% CI for effect size estimate was computed with 100 bootstrap samples.
## 
## Note: Shapiro-Wilk Normality Test for D: p-value = 0.820
## 
## Note: Bartlett's test for homogeneity of variances for factor Sample: p-value = 0.331
## 

ggsave("plot_class_early_D_stats.pdf",width=6,height=6)
# There is no significant differences of Simpson index (alpha diversity) across the developmental stages, likely because the number of sample is only N=2 (though each sample is a pool of five individuals). 

# Diversity using Pielou equitability index
comm.J <- comm.H/log(specnumber(comm), base=exp(1))
dfJ<-data.frame(J= comm.J,Sample=metadata$Class,Population=metadata$Population)
plotJ<-ggplot(dfJ,aes(x=Sample,y=J,fill=Population))+geom_bar(position="dodge",stat="identity")+theme_npgray()+scale_fill_grey()+scale_x_discrete(limits=c("Embryo NF30","Tadpole NF41","Tadpole NF48","Tadpole NF50","Tadpole NF52"), labels=c("NF30","NF41","NF48","NF50","NF52"))+xlab("Developmental stage")+labs(y="Pielou evenness",x="")
plotJ

# Pielou equitability index boxplot including stats
set.seed(623)
ggstatsplot::ggbetweenstats(data=dfJ,x=Sample,y=J,type="np")
## Note: 95% CI for effect size estimate was computed with 100 bootstrap samples.
## 
## Note: Shapiro-Wilk Normality Test for J: p-value = 0.666
## 
## Note: Bartlett's test for homogeneity of variances for factor Sample: p-value = 0.497
## 

ggsave("plot_class_early_J_stats.pdf",width=6,height=6)
# There is no significant differences of Pielou equitability (alpha diversity) across the developmental stages, likely because the number of sample is only N=2 (though each sample is a pool of five individuals). 

# Boxplots of the three alpha diversity metrics
grid.arrange(plotH, plotD, plotJ,nrow=3,ncol=1)

# Saving plots to files
#pdf("plotHDJ_Stage.pdf",width=6,height=10)
#print(grid.arrange(plotH, plotD, plotJ,nrow=3,ncol=1))
#dev.off()

#Comparison of dissimilarity measures
rankindex(comm,comm,c("euc","man","bray","jac","kul"))
##       euc       man      bray       jac       kul 
## 0.1760211 0.4160738 0.4160738 0.4160738 0.4160738
# Checking that we have a correct sampling of the diversity in our samples
plot(specaccum(comm), xlab = "# of samples", ylab = "# of species",main="Species accumulation in early tadpole's gut active microbiomes" )

# Saving plot to file
#pdf("plot_spec_accum_stage.pdf",width=7,height=5)
#print(plot(specaccum(comm), xlab = "# of samples", ylab = "# of species",main="Species accumulation in early tadpole's gut active microbiomes" ))
#dev.off()

# Computing dissimilarity using Bray-Curtis distance (weighted)
comm.bc.dist <- vegdist(comm, method = "bray")
# Clustering to analyse samples - Bray
comm.bc.clust <- hclust(comm.bc.dist, method = "average")
# Saving plot to file
#pdf("plot_bc_clust_early.pdf",width=8,height=6)
#print(plot(comm.bc.clust, ylab = "Bray-Curtis dissimilarity"))
#dev.off()


#Dissimilarity using Sorenson (unweighted)
comm.so.dist<-vegdist(comm,method="bray",binary="TRUE")
# Clustering to analyse samples - Sorenson
comm.so.clust <- hclust(comm.so.dist, method = "average")
# Saving plot to file
#pdf("plot_so_clust_early.pdf",width=8,height=6)
#print(plot(comm.so.clust, ylab = "Sorenson dissimilarity"))
#dev.off()


################################
#  Ordination NMDS Bray-Curtis #
################################


# Ordination using NMDS
comm.bc.mds <- metaMDS(comm, dist = "bray")
## Run 0 stress 0.1158437 
## Run 1 stress 0.1766634 
## Run 2 stress 0.1158437 
## ... Procrustes: rmse 6.15084e-05  max resid 0.0001102543 
## ... Similar to previous best
## Run 3 stress 0.1158437 
## ... New best solution
## ... Procrustes: rmse 3.18911e-05  max resid 5.638373e-05 
## ... Similar to previous best
## Run 4 stress 0.129208 
## Run 5 stress 0.123035 
## Run 6 stress 0.1158437 
## ... Procrustes: rmse 1.02154e-05  max resid 1.774324e-05 
## ... Similar to previous best
## Run 7 stress 0.1158437 
## ... Procrustes: rmse 3.718608e-05  max resid 7.828764e-05 
## ... Similar to previous best
## Run 8 stress 0.1290646 
## Run 9 stress 0.1468006 
## Run 10 stress 0.1730187 
## Run 11 stress 0.1230349 
## Run 12 stress 0.129069 
## Run 13 stress 0.1230349 
## Run 14 stress 0.1292063 
## Run 15 stress 0.1467997 
## Run 16 stress 0.1158438 
## ... Procrustes: rmse 0.0001308127  max resid 0.0002372645 
## ... Similar to previous best
## Run 17 stress 0.1292068 
## Run 18 stress 0.1158437 
## ... Procrustes: rmse 5.941338e-05  max resid 0.0001120854 
## ... Similar to previous best
## Run 19 stress 0.1485038 
## Run 20 stress 0.1783666 
## *** Solution reached
# We have a stress value of 0.18, thus the NMDS ordination provides a correct representation of the distances between the different developmental stage samples.

# Checking the NMDS
stressplot(comm.bc.mds)

# Saving plot to file
#pdf("plot_nmds_stressplot.pdf",width=8,height=6)
#print(stressplot(comm.bc.mds))
#dev.off()

# Plotting NMDS ordination using Bray-Curtis distance
mds.fig <- ordiplot(comm.bc.mds, type = "none")
points(mds.fig, "sites", pch = 19, col = "grey")
orditorp(comm.bc.mds, display = "sites")

# The first dimension seems to correspond to the progression across developmental stages. The stage NF52 stands out of this trend.

# Saving plot to file
#pdf("plot1_nmds.pdf",width=8,height=6)
#print(mds.fig <- ordiplot(comm.bc.mds, type = "none"))
#print(points(mds.fig, "sites", pch = 19, col = "grey"))
#print(orditorp(comm.bc.mds, display = "sites"))
#dev.off()

################################
#  Ordination beta-dispersion  #
################################

# Plotting the ordination results of beta-dispersion PCoA (PERMDISP)
comm.betadisp=betadisper(comm.bc.dist, metadata$Class, type="median")
plot(comm.betadisp)

# Like for the NMDS, the first dimension seems to correspond to the progression across developmental stages with stage NF52 standing out of this trend.

# Saving plot to file
#pdf("plot1_betadispa.pdf",width=8,height=6)
#print(mds.fig3a<-plot(comm.betadisp))
#dev.off()

# Significance
comm.betadisp.perm=permutest(comm.betadisp, group= metadata$Class, type="median", permutations=999, pairwise=TRUE)
comm.betadisp.perm
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df   Sum Sq Mean Sq          F N.Perm Pr(>F)    
## Groups     4 0.040438 0.01011 5.4191e+29    999  0.001 ***
## Residuals  5 0.000000 0.00000                             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
##              Embryo NF30 Tadpole NF41 Tadpole NF48 Tadpole NF50 Tadpole NF52
## Embryo NF30                4.0000e-03   1.2000e-02   6.0000e-03        0.015
## Tadpole NF41  4.9460e-31                1.3000e-02   6.0000e-03        0.004
## Tadpole NF48  1.9542e-30   5.9413e-30                1.4000e-02        0.011
## Tadpole NF50  1.3237e-30   1.2339e-30   7.8650e-28                     0.013
## Tadpole NF52  5.8480e-27   9.7311e-31   2.9691e-30   2.3918e-30
# There is a significant effect of developmental stage on the active gut microbiomes at early stages.
comm.betadisp.HSD<-TukeyHSD(comm.betadisp)
plot(comm.betadisp.HSD)

# Significant differences in communities are between NF30-NF41; NF30-NF50 and NF41-NF50.
#Anova to test the significance of variance homogeneity
anova(comm.betadisp)
## Analysis of Variance Table
## 
## Response: Distances
##           Df   Sum Sq Mean Sq    F value    Pr(>F)    
## Groups     4 0.040438 0.01011 5.4191e+29 < 2.2e-16 ***
## Residuals  5 0.000000 0.00000                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# The anova result is concordant with the permutest, there is a significant effect of developmental stage on the active gut microbiomes at early stages.

#######################################################
#  Analysis of community structure using MPD and MNTD #
#######################################################

# Phylogenetic distance to measure the community phylogenetic structure  without abundance
phy.dist <- cophenetic(phy)
comm.sesmpd <- ses.mpd(comm, phy.dist, null.model = "richness", abundance.weighted = FALSE,  runs = 999)
comm.sesmntd <- ses.mntd(comm, phy.dist, null.model = "richness", abundance.weighted = FALSE,  runs = 999)

# Inspecting obtained values
#comm.sesmpd
#comm.sesmntd

#Figure with comparisons SES(MPD) and SES(MNTD)
df3<-data.frame(SES_MPD = comm.sesmpd$mpd.obs.z, Sample = metadata$Class, Population=metadata$Population)
plot3<-ggplot(df3,aes(x=Sample,y=SES_MPD,color=Population))+geom_point()+scale_x_discrete(limits=c("Embryo NF30","Tadpole NF41","Tadpole NF48","Tadpole NF50","Tadpole NF52"), labels=c("NF30","NF41","NF48","NF50","NF52"))+theme_nb()+labs(x="",y="SES MPD")+guides(color=FALSE)
df4<-data.frame(SES_MNTD = comm.sesmntd$mntd.obs.z, Sample = metadata$Class, Population=metadata$Population)
plot4<-ggplot(df4,aes(x=Sample,y=SES_MNTD,color=Population))+geom_point()+scale_x_discrete(limits=c("Embryo NF30","Tadpole NF41","Tadpole NF48","Tadpole NF50","Tadpole NF52"), labels=c("NF30","NF41","NF48","NF50","NF52"))+theme_nb()+labs(x="",y="SES MNTD")+guides(color=FALSE)
grid.arrange(plot3, plot4, ncol=2)

# Saving plot to file
#pdf("plot34_sesmpdmntd.pdf",width=5,height=4)
#print(grid.arrange(plot3, plot4, ncol=2))
#dev.off()


# Significance
set.seed(623)
ggstatsplot::ggbetweenstats(data=df3,x=Sample,y=SES_MPD,type="np")
## Note: 95% CI for effect size estimate was computed with 100 bootstrap samples.
## 
## Note: Shapiro-Wilk Normality Test for SES_MPD: p-value = 0.206
## 
## Note: Bartlett's test for homogeneity of variances for factor Sample: p-value = 0.121
## 

ggsave("plot_SESMPD_stat.pdf",width=5,height=4)
# There is no significant difference of beta-diversity using SES(MPD).

ggstatsplot::ggbetweenstats(data=df4,x=Sample,y=SES_MNTD,type="np")
## Note: 95% CI for effect size estimate was computed with 100 bootstrap samples.
## 
## Note: Shapiro-Wilk Normality Test for SES_MNTD: p-value = 0.014
## 
## Note: Bartlett's test for homogeneity of variances for factor Sample: p-value = 0.174
## 

ggsave("plot_SESMNTD_stat.pdf",width=5,height=4)
# There is no significant difference of beta-diversity using SES(MNTD).

# Beta-diversité phylogénétique
# calculate phylogenetic MNTD beta diversity
comm.mntd.dist <- comdistnt(comm, phy.dist, abundance.weighted = TRUE)

# calculate Mantel correlation for taxonomic Bray-Curtis vs. phylogenetic
# MNTD diversity
mantel(comm.bc.dist, comm.mntd.dist)
## 
## Mantel statistic based on Pearson's product-moment correlation 
## 
## Call:
## mantel(xdis = comm.bc.dist, ydis = comm.mntd.dist) 
## 
## Mantel statistic r: 0.4947 
##       Significance: 0.001 
## 
## Upper quantiles of permutations (null model):
##   90%   95% 97.5%   99% 
## 0.242 0.301 0.342 0.378 
## Permutation: free
## Number of permutations: 999
# Taxonomic (Bray-Curtis) dissimilarity explained
adonis(comm.bc.dist ~ Class, data = metadata)
## 
## Call:
## adonis(formula = comm.bc.dist ~ Class, data = metadata) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)  
## Class      4   1.20122  0.3003  2.4199 0.65939  0.014 *
## Residuals  5   0.62048  0.1241         0.34061         
## Total      9   1.82170                 1.00000         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# There is a significant effect (P=0.014)  of the developmental stage on the taxonomic structure of communities based on Bray-Curtis dissimilarity.

# Phylogenetic dissimilarity explained
adonis(comm.mntd.dist ~ Class, data = metadata)
## 
## Call:
## adonis(formula = comm.mntd.dist ~ Class, data = metadata) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df  SumsOfSqs    MeanSqs F.Model      R2 Pr(>F)    
## Class      4 2.7109e-05 6.7772e-06  9.0696 0.87887  0.001 ***
## Residuals  5 3.7362e-06 7.4720e-07         0.12113           
## Total      9 3.0845e-05                    1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# There is a significant effect (P=0.001) of the developmental stage on the phylogenetic composition of communities based on MNTD dissimilarity.