# Xenopus microbiome development biodiversity analysis
# Comparaison des communautés entre tetards prométamorphiques, prémétamorphiques, métamorphiques et adultes.
# Chargement des packages
library(phyloseq)
library(phytools)
## Loading required package: ape
## Loading required package: maps
library(tidyverse)
## ── Attaching packages ─────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x purrr::map() masks maps::map()
library(ggplot2)
library(ggplotify)
library(tibble)
library(breakaway)
library(DivNet)
library(doParallel)
## Loading required package: foreach
##
## Attaching package: 'foreach'
## The following objects are masked from 'package:purrr':
##
## accumulate, when
## Loading required package: iterators
## Loading required package: parallel
## Folders, Themes, colors
source("prelude.R")
# Setting up directories
data_dir <-paste0(data_dir_path,"xpdev")
output_dir <- paste0(output_dir_path,"Revision")
# Importing data
# Defining the directory containing the data to import
setwd(data_dir)
# Importing abundance data
comm<-read.table("nmt3_abondance_tpn_notax_XP_Dev.asc",header=TRUE,row.names=1)
# Getting rid of OTUs with zero abundance
comm<-comm %>% select_if((function(col) is.numeric(col) && sum(col) > 0))
comm<-t(comm)
#Importing metadata
metadata<-read.table("nmt3_2_metadata_xp_dev.csv",sep=";",header=TRUE,row.names=1)
# Sorting by developmental stage
metadata$LifeStage_sorted<-factor(metadata$LifeStage,levels=c("Premetamorph","Prometamorph","Metamorph","Froglet","Adult"))
taxmat<-read.csv("nmt3_Gal18_abondance_tax.csv",sep=";",header=TRUE,row.names=1)
taxmat<-as.matrix(taxmat)
OTU <- otu_table(comm, taxa_are_rows = TRUE)
TAX <- tax_table(taxmat)
sample_data <- sample_data(metadata)
#Importing phylogeny
phy<-read.tree("nmt3_sequences_mpr.tree")
#Rooting the tree using midpoint rooting
phy<-midpoint.root(phy)
physeq.ini <- merge_phyloseq(OTU,sample_data,TAX,phy)
#Defining the output directory for saving plots
setwd(output_dir)
#Let us look at the observed richness as a function of sequencing depth
#From the divnet tutorial at https://github.com/adw96/stamps2018/blob/master/estimation/diversity-lab.R
observed_c <- sample_richness(physeq.ini)
dev_observed_depth<-data.frame("observed_richness" = (observed_c %>% summary)$estimate,"depth" = phyloseq::sample_sums(physeq.ini),"type" = physeq.ini %>% sample_data %>% get_variable("LifeStage_sorted"))
ggplot(dev_observed_depth,aes(x = depth, y = observed_richness, color = type))+geom_point(size=2,alpha=0.7)+stat_ellipse(type="norm",linetype=2)
#The observed richness does not seem correlated to sequencing depth, especially true regarding the premetamorph samples that were more deeply sequenced
#Nevertheless, we will use breakaway to estimate richness at the species level because it provides a proper measurement of errors
ba_dev_richness <- breakaway(physeq.ini)
#Having a look at the estimated richness
summary_ba_dev_richness<-summary(ba_dev_richness) %>% add_column("SampleNames" = physeq.ini %>% otu_table %>% sample_names)
summary_ba_dev_richness
## # A tibble: 33 x 8
## estimate error lower upper sample_names name model SampleNames
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr>
## 1 139. 9.70 125. 364. XT1A2G breakaway Negative Bino… XT1A2G
## 2 199. 107. 150. 4985. XT1A4G breakaway Kemp XT1A4G
## 3 149. 8.57 124. 378. XT1A5G breakaway Kemp XT1A5G
## 4 202. 9.96 169. 513. XT1B1G breakaway Kemp XT1B1G
## 5 197. 7.53 178. 373. XT1B2G breakaway Kemp XT1B2G
## 6 150. 4.89 142. 226. XT1B2Gbis breakaway Negative Bino… XT1B2Gbis
## 7 207. 66.1 178. 2505. XT1B4G breakaway Kemp XT1B4G
## 8 203. 36.8 178. 1477. XT1B5G breakaway Kemp XT1B5G
## 9 218. 26.4 162. 1503. XTLS1 breakaway Negative Bino… XTLS1
## 10 190. 16.9 149. 860. XTLS10 breakaway Negative Bino… XTLS10
## # … with 23 more rows
#Plotting the results
# Base plot showing the sample names
plot(ba_dev_richness,physeq=physeq.ini,color="LifeStage_sorted")
# More pretty without sample names, scale of the axis in sqrt to have a higher spread of the values.
p_ba_dev<-as.ggplot(plot(ba_dev_richness,physeq=physeq.ini,color="LifeStage_sorted")+facet_wrap(~color,scales="free_x",nrow=1)+scale_y_sqrt(breaks = c(100,500,1000,5000,10000,20000))+scale_color_manual(values=c("#d73027","#fc8d59","#4d4d4d","#91bfdb","#4575b4"),breaks=c("Premetamorph","Prometamorph","Metamorph","Froglet","Adult"))+theme_npgray()+guides(color=FALSE)+theme(axis.text.x = element_blank()))
p_ba_dev
ggsave(filename = "ba_dev_richness.pdf",width=20,height=15,units="cm")
#Since the depth of sequencing is heterogeneous and sometimes low, the estimates and the confidence intervals vary up to more than 20000 species to less than 500. Heterogeneity within LifeStages is noticeable, in particular the sample XTLS3 (metamorph,depth=23882 obs=165 ba=5712.4 model Negative Binomial),XTLS8 (froglet, depth=23213 obs=226 ba=1295.2 model Negative Binomial) and XTLTW5F(adult, depth=24030 obs=353 ba=1432.2 model Negative Binomial).The depths of sequencing are in the same range as the other samples.
#A prominent issue with the estimation of richness stem from the singletons. The problem is that sequence processing steps tend to filter out singletons. In our project we filtered out clusters with abundances below 0.005% abundance (all samples combined). Thus we can question the validity of the estimations computed by breakaway.
#Testing the influence of singletons with breakaway_no1
ba_dev_nof<- breakaway_nof1(physeq.ini)
plot(ba_dev_nof,physeq=physeq.ini,color="LifeStage_sorted")
p_ba_dev_nof<-as.ggplot(plot(ba_dev_nof,physeq=physeq.ini,color="LifeStage_sorted")+facet_wrap(~color,scales="free_x",nrow=1)+scale_y_sqrt(breaks = c(100,500,1000,5000,10000,20000))+scale_color_manual(values=c("#d73027","#fc8d59","#4d4d4d","#91bfdb","#4575b4"),breaks=c("Premetamorph","Prometamorph","Metamorph","Froglet","Adult"))+theme_npgray()+guides(color=FALSE)+theme(axis.text.x = element_blank()))
p_ba_dev_nof
ggsave(filename = "ba_dev_nof_richness.pdf",width=20,height=15,units="cm")
#We observe that overall the error estimates are lower in the nof estimations. This is less true for the adult samples. Individually some samples have higher error rates in nof, or higher richness (more clear in adult samples). Clearly an effect of singletons for the metamorph sample XTLS3, where the richness is lower in the breakaway_nof1 estimation. Globally, there is an effect of singletons which was somehow expected.
#Testing with chao-bunge metric
chao_bunge_dev<-chao_bunge(physeq.ini)
## Warning in cutoff_wrap(my_data, requested = cutoff): ignoring requested cutoff;
## it's too low
## Warning in cutoff_wrap(my_data, requested = cutoff): ignoring requested cutoff;
## it's too low
## Warning in cutoff_wrap(my_data, requested = cutoff): ignoring requested cutoff;
## it's too low
## Warning in cutoff_wrap(my_data, requested = cutoff): ignoring requested cutoff;
## it's too low
## Warning in cutoff_wrap(my_data, requested = cutoff): ignoring requested cutoff;
## it's too low
plot(chao_bunge_dev,physeq=physeq.ini,color="LifeStage_sorted")
as.ggplot(plot(chao_bunge_dev,physeq=physeq.ini,color="LifeStage_sorted")+facet_wrap(~color,scales="free_x",nrow=1)+scale_y_sqrt(breaks = c(100,500,1000,5000,10000,20000))+scale_color_manual(values=c("#d73027","#fc8d59","#4d4d4d","#91bfdb","#4575b4"),breaks=c("Premetamorph","Prometamorph","Metamorph","Froglet","Adult"))+theme_npgray()+guides(color=FALSE)+theme(axis.text.x = element_blank()))
ggsave(filename = "ba_dev_chaobunge_richness.pdf",width=20,height=15,units="cm")
#The chao-bunge estimates are in line with the breakaway estimate, except that richness estimates are more homogeneous across samples. Errors are globally lower in chao-bunge for premetamorph.
# Testing the hypothesis that the estimated species diversity changes across Life Stages
bt <- betta(summary(ba_dev_richness)$estimate,
summary(ba_dev_richness)$error,
make_design_matrix(physeq.ini, "LifeStage_sorted"))
# Let us have a look at the results
bt$table
## Estimates Standard Errors p-values
## (Intercept) 171.28863 7.795532 0.000
## predictorsPrometamorph 187.06752 43.616307 0.000
## predictorsMetamorph 16.33536 22.464534 0.467
## predictorsFroglet 21.99393 16.274114 0.177
## predictorsAdult 147.97100 17.582418 0.000
# Overall across development , the mean estimated richness in the premetamorphic tadpole gut was 171 species. The diversity in prometamorphic tadpoles was significantly higher with about 187 more species (i.e. twice as much), this was also the case in adults with about 148 more species. On the contrary, metamorph tadpoles and froglets were not more or less diverse at the species level.
# Recipes to turn the results into a tibble for plotting results
table_bt<-as.data.frame(bt$table,row.names=c("Premetamorph","Prometamorph","Metamorph","Froglet","Adult"))
table_bt_tbl<-rownames_to_column(table_bt,var="LifeStage") %>% as_tibble()
#We recompute richness estimates made using Premetamorph LifeStage as intercept in the model
table_bt_tbl<-mutate(table_bt_tbl,Richness=if_else(LifeStage == "Premetamorph",171.28863,171.28863+Estimates))
plot_bt<-ggplot(table_bt_tbl)+geom_point(aes(x=LifeStage,y=Richness,color=LifeStage),size=3,alpha=0.7)+geom_linerange(aes(x=LifeStage,y=Richness,ymin = Richness -`Standard Errors`, ymax = Richness + `Standard Errors`,color=LifeStage))+scale_color_manual(values=c("#d73027","#fc8d59","#4d4d4d","#91bfdb","#4575b4"),breaks=c("Premetamorph","Prometamorph","Metamorph","Froglet","Adult"))+xlab("Life stages")+ylab("OTU richness estimate \n species level")+scale_x_discrete(limits=c("Premetamorph","Prometamorph","Metamorph","Froglet","Adult"),label=c("Premet.","Promet.","Meta.","Froglet","Adult"))+theme_npgray()+guides(color=FALSE)
plot_bt
ggsave(filename = "dev_richness.pdf",width=12,height=9,units="cm")
# Testing the hypothesis that the estimated species diversity changes across Life Stages using Chao-Bunge metric
bt_chao_bunge <- betta(summary(chao_bunge_dev)$estimate,
summary(chao_bunge_dev)$error,
make_design_matrix(physeq.ini, "LifeStage_sorted"))
# Let us have a look at the results
bt_chao_bunge$table
## Estimates Standard Errors p-values
## (Intercept) 171.518204 7.491541 0.000
## predictorsPrometamorph 122.875504 17.632490 0.000
## predictorsMetamorph 5.789171 22.574468 0.798
## predictorsFroglet 77.690047 26.857015 0.004
## predictorsAdult 160.243926 14.338527 0.000
# Overall across development , the mean estimated richness in the premetamorphic tadpole gut was 171 species. The diversity in prometamorphic tadpoles was significantly higher (p=0.000) with about 123 more species, this was also the case in froglets with about 78 more species (p-value=0.004) and with adults with about 160 more species. On the contrary, metamorph tadpoles were not more or less diverse at the species level (p=0.798).
# Now we can use these richness estimates to compute alpha and beta diversity metrics with divnet at the species level and using Life Stages information.
set.seed(20200923)
# Compute alpha and beta diversity metrics at the sample level
# Uncomment to run the following command requiring about thirty minutes of computation using a 4-cores i7 machine.
dv_dev<-divnet(physeq.ini,ncores = 4,tuning = "careful")
##
|
| | 0%
|
|======= | 10%
|
|============== | 20%
|
|===================== | 30%
|
|============================ | 40%
|
|=================================== | 50%
|
|========================================== | 60%
|
|================================================= | 70%
|
|======================================================== | 80%
|
|=============================================================== | 90%
|
|======================================================================| 100%
##
|
| | 0%
|
|======= | 10%
|
|============== | 20%
|
|===================== | 30%
|
|============================ | 40%
|
|=================================== | 50%
|
|========================================== | 60%
|
|================================================= | 70%
|
|======================================================== | 80%
|
|=============================================================== | 90%
|
|======================================================================| 100%
##
|
| | 0%
|
|======= | 10%
|
|============== | 20%
|
|===================== | 30%
|
|============================ | 40%
|
|=================================== | 50%
|
|========================================== | 60%
|
|================================================= | 70%
|
|======================================================== | 80%
|
|=============================================================== | 90%
|
|======================================================================| 100%
##
|
| | 0%
|
|======= | 10%
|
|============== | 20%
|
|===================== | 30%
|
|============================ | 40%
|
|=================================== | 50%
|
|========================================== | 60%
|
|================================================= | 70%
|
|======================================================== | 80%
|
|=============================================================== | 90%
|
|======================================================================| 100%
##
|
| | 0%
|
|======= | 10%
|
|============== | 20%
|
|===================== | 30%
|
|============================ | 40%
|
|=================================== | 50%
|
|========================================== | 60%
|
|================================================= | 70%
|
|======================================================== | 80%
|
|=============================================================== | 90%
|
|======================================================================| 100%
##
|
| | 0%
|
|======= | 10%
|
|============== | 20%
|
|===================== | 30%
|
|============================ | 40%
|
|=================================== | 50%
|
|========================================== | 60%
|
|================================================= | 70%
|
|======================================================== | 80%
|
|=============================================================== | 90%
|
|======================================================================| 100%
# Compute alpha and beta diversity metrics at the Life Stage category level
# Uncomment to run the following command requiring about thirty minutes of computation using a 4-cores i7 machine.
dv_dev_ls<-divnet(physeq.ini,ncores = 4,tuning = "careful",X="LifeStage_sorted")
##
|
| | 0%
|
|======= | 10%
|
|============== | 20%
|
|===================== | 30%
|
|============================ | 40%
|
|=================================== | 50%
|
|========================================== | 60%
|
|================================================= | 70%
|
|======================================================== | 80%
|
|=============================================================== | 90%
|
|======================================================================| 100%
##
|
| | 0%
|
|======= | 10%
|
|============== | 20%
|
|===================== | 30%
|
|============================ | 40%
|
|=================================== | 50%
|
|========================================== | 60%
|
|================================================= | 70%
|
|======================================================== | 80%
|
|=============================================================== | 90%
|
|======================================================================| 100%
##
|
| | 0%
|
|======= | 10%
|
|============== | 20%
|
|===================== | 30%
|
|============================ | 40%
|
|=================================== | 50%
|
|========================================== | 60%
|
|================================================= | 70%
|
|======================================================== | 80%
|
|=============================================================== | 90%
|
|======================================================================| 100%
##
|
| | 0%
|
|======= | 10%
|
|============== | 20%
|
|===================== | 30%
|
|============================ | 40%
|
|=================================== | 50%
|
|========================================== | 60%
|
|================================================= | 70%
|
|======================================================== | 80%
|
|=============================================================== | 90%
|
|======================================================================| 100%
##
|
| | 0%
|
|======= | 10%
|
|============== | 20%
|
|===================== | 30%
|
|============================ | 40%
|
|=================================== | 50%
|
|========================================== | 60%
|
|================================================= | 70%
|
|======================================================== | 80%
|
|=============================================================== | 90%
|
|======================================================================| 100%
##
|
| | 0%
|
|======= | 10%
|
|============== | 20%
|
|===================== | 30%
|
|============================ | 40%
|
|=================================== | 50%
|
|========================================== | 60%
|
|================================================= | 70%
|
|======================================================== | 80%
|
|=============================================================== | 90%
|
|======================================================================| 100%
# Let us look at the estimate using shannon metric compute (alpha diversity) at the samples level
shannon_dv_dev<-summary(dv_dev$shannon) %>% add_column("LifeStage" = physeq.ini %>% sample_data %>% get_variable("LifeStage_sorted")) %>% add_column("Category" = "Samples")
# Let us plot the results of the shannon index at the sample level
ggplot(shannon_dv_dev)+geom_point(aes(x=LifeStage,y=estimate,color=LifeStage),alpha=0.7)+geom_errorbar(aes(x=LifeStage,y=estimate,ymin = lower, ymax = upper,color=LifeStage),width=0.1)+scale_color_manual(values=c("#d73027","#fc8d59","#4d4d4d","#91bfdb","#4575b4"),breaks=c("Premetamorph","Prometamorph","Metamorph","Froglet","Adult"))+xlab("Life stages")+ylab("Shannon diversity estimate")+scale_x_discrete(limits=c("Premetamorph","Prometamorph","Metamorph","Froglet","Adult"),label=c("Premet.","Promet.","Meta.","Froglet","Adult"))+theme_npgray()+guides(color=FALSE)
# The error bars associated with shannon estimates at the sample level are relatively small. Note that the values can differ from samples, e.g. at the metamorph life stage.
# Let us look at the values of the evaluation of the shannon index at the Life Stage level
shannon_dv_dev_ls<-summary(dv_dev_ls$shannon) %>% add_column("LifeStage" = physeq.ini %>% sample_data %>% get_variable("LifeStage_sorted")) %>% add_column("Category" = "Group")
testDiversity(dv_dev_ls,"shannon")
## Hypothesis testing:
## p-value for global test: 0
## Estimates Standard Errors p-values
## (Intercept) 1.10364390 0.05579018 0.000
## predictorsPrometamorph 1.60915397 0.24576862 0.000
## predictorsMetamorph 0.04708822 0.20952270 0.822
## predictorsFroglet 1.97994665 0.09977407 0.000
## predictorsAdult 1.72324056 0.18412408 0.000
# There is a significant difference in alpha diversity between premetamorphic tadpoles and prometamorphic ones (p=0.00), also with Froglets and Adults (p=0) but not with metamorph (p=0.875).
# There is a significant difference in alpha diversity across life stages as measured using the shannon index (p=0)
# Plot the results altogether with the estimate at the sample level.
plot_shannon<-ggplot()+geom_jitter(data=shannon_dv_dev,aes(x=LifeStage,y=estimate,color=LifeStage),width=0.2)+geom_pointrange(data=shannon_dv_dev_ls,aes(x=LifeStage,y=estimate,ymin = lower, ymax = upper,color=LifeStage))+scale_color_manual(values=c("#d73027","#fc8d59","#4d4d4d","#91bfdb","#4575b4"),breaks=c("Premetamorph","Prometamorph","Metamorph","Froglet","Adult"))+xlab("Life stages")+ylab("Shannon diversity estimate")
plot_shannon+scale_x_discrete(limits=c("Premetamorph","Prometamorph","Metamorph","Froglet","Adult"),label=c("Premet.","Promet.","Meta.","Froglet","Adult"))+theme_npgray()+guides(color=FALSE)
ggsave(filename = "dev_shannon.pdf",width=12,height=9,units="cm")
# The same for the Simpson metric
simpson_dv_dev<-summary(dv_dev$simpson) %>% add_column("LifeStage" = physeq.ini %>% sample_data %>% get_variable("LifeStage_sorted")) %>% add_column("Category" = "Samples")
simpson_dv_dev_ls<-summary(dv_dev_ls$simpson) %>% add_column("LifeStage" = physeq.ini %>% sample_data %>% get_variable("LifeStage_sorted")) %>% add_column("Category" = "Group")
testDiversity(dv_dev_ls,"simpson")
## Hypothesis testing:
## p-value for global test: 0
## Estimates Standard Errors p-values
## (Intercept) 0.60861777 0.01214093 0.000
## predictorsPrometamorph -0.43802637 0.06411405 0.000
## predictorsMetamorph -0.02464166 0.08095992 0.761
## predictorsFroglet -0.51186549 0.01444472 0.000
## predictorsAdult -0.36786872 0.03390395 0.000
# There is a significant difference in alpha diversity between premetamorphic tadpoles and prometamorphic ones (p=0.00), also with Froglets and Adults (p=0) but not with metamorph (p=0.805).
# There is a significant difference in alpha diversity across life stages as measured using the simpson index (p=0)
# Plot the results altogether with the estimate at the sample level.
plot_simpson<-ggplot()+geom_jitter(data=simpson_dv_dev,aes(x=LifeStage,y=estimate,color=LifeStage),width=0.2)+geom_pointrange(data=simpson_dv_dev_ls,aes(x=LifeStage,y=estimate,ymin = lower, ymax = upper,color=LifeStage))+scale_color_manual(values=c("#d73027","#fc8d59","#4d4d4d","#91bfdb","#4575b4"),breaks=c("Premetamorph","Prometamorph","Metamorph","Froglet","Adult"))+xlab("Life stages")+ylab("Simpson diversity estimate")
plot_simpson+scale_x_discrete(limits=c("Premetamorph","Prometamorph","Metamorph","Froglet","Adult"),label=c("Premeta","Prometa","Meta","Froglet","Adult"))+theme_npgray()+guides(color=FALSE)
ggsave(filename = "dev_simpson.pdf",width=12,height=9,units="cm")
# We plot a beta-diversity measure (bray-curtis dissimilarity)
pbeta<-simplifyBeta(dv_dev_ls, physeq.ini, "bray-curtis", "LifeStage") %>%
ggplot(aes(x = interaction(Covar1, Covar2),y = beta_est,color = interaction(Covar1, Covar2))) + geom_point(size=3) +
geom_linerange(aes(ymin = lower, ymax = upper)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
xlab("") + ylab("Estimates of Bray-Curtis distance") +
theme_npgray() +
theme(axis.text.x=element_text(angle=25,hjust=1)) +
guides(color=FALSE)
# Ordering along the chronology of life stages
pbeta+scale_x_discrete(limits=c("Premetamorph.Premetamorph","Prometamorph.Premetamorph","Metamorph.Premetamorph","Froglet.Premetamorph","Adult.Premetamorph","Metamorph.Prometamorph","Prometamorph.Froglet","Adult.Prometamorph","Metamorph.Froglet","Adult.Metamorph","Adult.Froglet"),labels=c("Premeta.Premeta","Premeta.Prometa","Premeta.Meta","Premeta.Froglet","Premeta.Adult","Prometa.Meta","Prometa.Froglet","Prometa.Adult","Meta.Froglet","Meta.Adult","Froglet.Adult"))
ggsave(filename = "dev_braycurtis.pdf",width=12,height=9,units="cm")