Xenopus metatetard rarefaction on all samples

# Message and warning turned off for rendering
# Analysis using phyloseq
        library(phyloseq)
        library(ggplot2)
        library(phytools)
        library(dplyr)
        library(parallel)
        library(scales)
        library(ranacapa)
        library(vegan)
## Folders, Themes, colors
source("prelude.R")

# Setting up directories
data_dir <-paste0(data_dir_path,"xpall")
output_dir <- paste0(output_dir_path,"Supp_Figure_RarefactionCurve")

# Importing data
# Defining our data directory
setwd(data_dir)

#Importing abundance data
comm<-read.table("nmt3_abondance_tpn_notax.asc", header=TRUE, row.names=1)
# Getting rid of OTUs with zero abundance
comm<-comm %>% select_if((function(col) is.numeric(col) && sum(col) > 0))
comm<-t(comm)

# Importing metadata
metadata<-read.csv("nmt3_1_metadata.csv",sep=";",header=TRUE,row.names=1)
metadata$XP_Tissue<-as.factor(metadata$XP_Tissue)
metadata$LifeStage_sorted<-factor(metadata$LifeStage,levels=c("Premetamorph","Prometamorph","Metamorph","Froglet","Adult"))

# Importing taxonomy
taxmat<-read.csv("nmt3_Gal18_abondance_tax.csv",sep=";",header=TRUE,row.names=1)
taxmat<-as.matrix(taxmat)
OTU<-otu_table(comm, taxa_are_rows = TRUE)
TAX<-tax_table(taxmat)
sample_data<-sample_data(metadata)
# Importing phylogeny
phy<-read.tree("nmt3_sequences_mpr.tree")
# Rooting the tree using midpoint rooting
phy<-midpoint.root(phy)
# Combining the input data
physeq.ini<-merge_phyloseq(OTU,sample_data,TAX,phy)

# Rarefaction plot
plot_rarefaction<-ggrare(physeq.ini,step=1000,se=TRUE,parallel=TRUE,color="NucleicAcid")+facet_wrap(~XP_Tissue,nrow=2)

plot_rarefaction+ scale_x_continuous(labels = scales::label_number_si())+facet_wrap(~XP_Tissue,nrow=2)+theme_npgray()+theme(legend.position="bottom")

# breakaway
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble  3.0.3     ✓ purrr   0.3.4
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x readr::col_factor() masks scales::col_factor()
## x purrr::discard()    masks scales::discard()
## x dplyr::filter()     masks stats::filter()
## x dplyr::lag()        masks stats::lag()
## x purrr::map()        masks maps::map()
library(breakaway)
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
library(ggplotify)

# Estimation of richness using breakaway https://adw96.github.io/breakaway/articles/breakaway.html 

# Adult Feces samples
subset_feces<-physeq.ini %>% subset_samples(AdultTissue %in% "Feces")
richness_feces<-subset_feces %>% breakaway
p1<-as.ggplot(plot(richness_feces,physeq=subset_feces,color="NucleicAcid")+facet_wrap(~color,scales="free")+scale_y_sqrt(breaks = c(100,500,1000,2500,5000,7500)))
# I do not dare to compare DNA and RNA since we have only two RNA fecal samples

# Adult Skin 
subset_skin<-physeq.ini %>% subset_samples(AdultTissue %in% "Swab")
richness_skin<-subset_skin %>% breakaway
p2<-as.ggplot(plot(richness_skin,physeq=subset_skin,color="NucleicAcid")+facet_wrap(~color,scales="free")+scale_y_sqrt(breaks = c(100,500,1000,2500,5000,7500,10000)))
p2

# I do not dare to compare DNA and RNA since we have only two RNA skin samples

# Adult Gut tissues
subset_git<-physeq.ini %>% subset_samples(AdultTissue %in% c("Rectum","Stomach","Intestine","Gut"))
richness_git<-subset_git %>% breakaway
p3<-as.ggplot(plot(richness_git,physeq=subset_git,color="AdultTissue",shape="NucleicAcid")+facet_wrap(~color,scales="free")+scale_y_sqrt(breaks = c(100,500,1000,5000,10000,20000)))
p3

# Tadpole's gut
subset_tgit<-physeq.ini %>% subset_samples(XP_Tissue %in% "TadpoleGut")
richness_tgit<-subset_tgit %>% breakaway
p4<-as.ggplot(plot(richness_tgit,physeq=subset_tgit,color="LifeStage",shape="NucleicAcid")+facet_wrap(~color,scales="free")+scale_y_sqrt(breaks = c(100,500,1000,5000,10000,20000)))
p4

#Eggs

subset_eggs<-physeq.ini %>% subset_samples(XP_Tissue %in% "Eggs")
richness_eggs<-subset_eggs %>% breakaway
p5<-as.ggplot(plot(richness_eggs,physeq=subset_eggs,color="LifeStage",shape="NucleicAcid")+scale_y_sqrt(breaks = c(100,500,1000,5000,10000,15000)))
p5

# Food
subset_food<-physeq.ini %>% subset_samples(XP_Tissue %in% "Food")
richness_food<-subset_food %>% breakaway
p6<-as.ggplot(plot(richness_food,physeq=subset_food,shape="NucleicAcid")+scale_y_sqrt(breaks = c(100,500,1000,2500,5000)))
p6

#####################
# Life Stage Series #
#####################

# Subsetting the samples corresponding to the analysis across life stages
subset_lifestages<-physeq.ini %>% subset_samples(AdultTissue %in% "Gut") 

# We look at the relation between observed OTU richness and sequencing depth
observed_c <- sample_richness(subset_lifestages)
dev_observed_depth<-data.frame("observed_richness" = (observed_c %>% summary)$estimate,"depth" = phyloseq::sample_sums(subset_lifestages),"type" = subset_lifestages %>% sample_data %>% get_variable("LifeStage_sorted"))

ggplot(dev_observed_depth,aes(x = depth, y = observed_richness, color = type))+geom_point(size=2,alpha=0.7)+stat_ellipse(type="norm",linetype=2)

# From this plot, we do not observe a correlation between observed richness at the species level and depth of sequencing.

# Rarefaction plot on these life stage samples, and plotting using defined x and y limits for comparison purposes
plot_rarefaction<-ggrare(subset_lifestages,step=1000,se=TRUE,parallel=TRUE)+facet_wrap(~LifeStage_sorted,nrow=5)

plot_rarefaction + scale_x_continuous(labels = scales::label_number_si(),limits=c(0,60000))+theme_npgray()+scale_y_continuous(limits=c(0,600))

ggsave("rare_life_stage.pdf",width=12,height=32.5,units="cm")

#Species accumulation curve across all life stage samples
pdf("specaccum_lifestages.pdf",width=8,height=6)
par(cex=1.5,las=1)
plot(specaccum(vegan_otu(subset_lifestages)),font=2,lwd=2, xlab = "# of samples", ylab = "# of species",main="Species accumulation across life stages" )
dev.off()
## quartz_off_screen 
##                 2
# We have  a correct sampling of the diversity in our life stage samples taken as a whole

# Now let us have a look for each life stage separately
my_life_stages=c("Premetamorph","Prometamorph","Metamorph","Froglet","Adult")
par(mfrow=c(3,2))
for (i in 1:length(my_life_stages)) {
        d<-subset_lifestages %>% subset_samples(LifeStage %in% my_life_stages[i])
  plot(specaccum(vegan_otu(d)),font=2,xlab = "# of samples", ylab = "# of species",main=my_life_stages[i])
}


##########################
# Gut compartment Series #
##########################
subset_adultorgans<-physeq.ini %>% subset_samples(AdultTissue %in% c("Rectum","Stomach","Intestine","Gut","Feces","Swab")) %>% subset_samples(NucleicAcid %in% "DNA") %>% subset_samples(LifeStage %in% "Adult")

observed_c <- sample_richness(subset_adultorgans)
dev_observed_depth<-data.frame("observed_richness" = (observed_c %>% summary)$estimate,"depth" = phyloseq::sample_sums(subset_adultorgans),"type" = subset_adultorgans %>% sample_data %>% get_variable("AdultTissue"))

ggplot(dev_observed_depth,aes(x = depth, y = observed_richness, color = type))+geom_point(size=2,alpha=0.7)+stat_ellipse(type="norm",linetype=2)
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
# From this plot, we do not observe a correlation between observed richness at the species level and depth of sequencing.

#Rarefaction plot on these gut tissue samples 
plot_rarefaction<-ggrare(subset_adultorgans,step=1000,se=TRUE,parallel=TRUE)+facet_wrap(~AdultTissue,nrow=6)
plot_rarefaction+ scale_x_continuous(labels = scales::label_number_si(),limits=c(0,60000))+theme_npgray()+scale_y_continuous(limits=c(0,600))
ggsave("rare_gutcompartment.pdf",width=12,height=39,units="cm")


#Species accumulation curve across gut tissue samples
pdf("specaccum_gutcompartment.pdf",width=8,height=6)
par(cex=1.5,las=1)
plot(specaccum(vegan_otu(subset_adultorgans)), font=2,lwd=2,xlab = "# of samples", ylab = "# of species",main="Species accumulation across adult tissues" )
dev.off()
## quartz_off_screen 
##                 2
#We have  a correct sampling of the diversity across these samples as a whole

my_adult_organs<-c("Rectum","Stomach","Intestine","Gut","Feces","Swab")
par(mfrow=c(3,2))

for (i in 1:length(my_adult_organs)) {
        d<-subset_adultorgans %>% subset_samples(AdultTissue %in% my_adult_organs[i])
  plot(specaccum(vegan_otu(d)),font=2,xlab = "# of samples", ylab = "# of species",main=my_adult_organs[i])
}

###########################
# Early and late activity #
###########################
subset_earlylate<-physeq.ini %>% subset_samples(NucleicAcid %in% "RNA") %>% subset_samples(Class %in% c("Tadpole NF41","Tadpole NF48","Tadpole NF50","Tadpole NF52","Embryo NF30","Tadpole NF52 gut","Tadpole NF56 gut","Tadpole NF60 gut","Tadpole NF63 gut"))

observed_c <- sample_richness(subset_earlylate)
dev_observed_depth<-data.frame("observed_richness" = (observed_c %>% summary)$estimate,"depth" = phyloseq::sample_sums(subset_earlylate),"type" = subset_earlylate %>% sample_data %>% get_variable("LifeStage"))
ggplot(dev_observed_depth,aes(x = depth, y = observed_richness, color = type))+geom_point(size=2,alpha=0.7)+stat_ellipse(type="norm",linetype=2)
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse

#Here we see an effect of sequencing depth on the richness

#Rarefaction plot on these early vs late samples
plot_rarefaction<-ggrare(subset_earlylate,step=1000,se=TRUE,parallel=TRUE)+facet_wrap(~LifeStage,nrow=4)

plot_rarefaction+ scale_x_continuous(labels = scales::label_number_si(),limits=c(0,60000))+theme_npgray()+scale_y_continuous(limits=c(0,600))

ggsave("rare_earlylate.pdf",width=12,height=26,units="cm")

#Species accumulation curve across early and late tadpole samples
pdf("specaccum_earlylate.pdf",width=8,height=6)
par(cex=1.5,las=1)
plot(specaccum(vegan_otu(subset_earlylate)),font=2,lwd=2, xlab = "# of samples", ylab = "# of species",main="Species accumulation across early and late tadpoles" )
dev.off()
## quartz_off_screen 
##                 2
#We have  a correct sampling of the diversity in these samples
my_tadpole_stages=c("Embryo","Premetamorph","Prometamorph","Metamorph")
par(mfrow=c(2,2))
for (i in 1:length(my_tadpole_stages)) {
        d<-subset_earlylate %>% subset_samples(LifeStage %in% my_tadpole_stages[i])
  plot(specaccum(vegan_otu(d)),font=2,xlab = "# of samples", ylab = "# of species",main=my_tadpole_stages[i])
}

###################
# Diet experiment #
###################
subset_diet<-physeq.ini %>% subset_samples(NucleicAcid %in% "RNA") %>% subset_samples(Class %in% c("diet 1","diet 2"))
observed_c <- sample_richness(subset_diet)
dev_observed_depth<-data.frame("observed_richness" = (observed_c %>% summary)$estimate,"depth" = phyloseq::sample_sums(subset_diet),"type" = subset_diet %>% sample_data %>% get_variable("Class"))
ggplot(dev_observed_depth,aes(x = depth, y = observed_richness, color = type))+geom_point(size=2,alpha=0.7)+stat_ellipse(type="norm",linetype=2)

#Here we see an effect of sequencing depth on the richness

#Rarefaction plot on these diet experiment samples 
plot_rarefaction<-ggrare(subset_diet,step=1000,se=TRUE,parallel=TRUE)+facet_wrap(~Class,nrow=2)

plot_rarefaction+ scale_x_continuous(labels = scales::label_number_si(),limits=c(0,60000))+theme_npgray()+scale_y_continuous(limits=c(0,600))

ggsave("rare_diet.pdf",width=12,height=13,units="cm")

#Species accumulation curve across diet experiment samples
pdf("specaccum_diet.pdf",width=8,height=6)
par(cex=1.5,las=1)
plot(specaccum(vegan_otu(subset_diet)),font=2,lwd=2, xlab = "# of samples", ylab = "# of species",main="Species accumulation in tadpoles according to diet" )
dev.off()
## quartz_off_screen 
##                 2
#We have  a correct sampling of the diversity in these samples

my_diet_categories=c("diet 1","diet 2")
par(mfrow=c(2,1))
for (i in 1:length(my_diet_categories)) {
        d<-subset_diet %>% subset_samples(Class %in% my_diet_categories[i])
  plot(specaccum(vegan_otu(d)),font=2,xlab = "# of samples", ylab = "# of species",main=my_diet_categories[i])
}

###########################
# Transmission experiment #
###########################
subset_transmission<-physeq.ini %>% subset_samples(XP_Tissue %in% c("Feces","Skin","Eggs"))
observed_c <- sample_richness(subset_transmission)
dev_observed_depth<-data.frame("observed_richness" = (observed_c %>% summary)$estimate,"depth" = phyloseq::sample_sums(subset_transmission),"type" = subset_transmission %>% sample_data %>% get_variable("XP_Tissue"))

ggplot(dev_observed_depth,aes(x = depth, y = observed_richness, color = type))+geom_point(size=2,alpha=0.7)+stat_ellipse(type="norm",linetype=2)

#The effect of sequencing depth on observed richness is very limited

#Rarefaction plot on these transmission experiment samples 
plot_rarefaction<-ggrare(subset_transmission,step=1000,se=TRUE,parallel=TRUE)+facet_wrap(~XP_Tissue,nrow=3)

plot_rarefaction+ scale_x_continuous(labels = scales::label_number_si(),limits=c(0,60000))+theme_npgray()+scale_y_continuous(limits=c(0,600))

ggsave("rare_transmission.pdf",width=12,height=19.5,units="cm")


#Species accumulation curve across transmission samples
pdf("specaccum_transmission.pdf",width=8,height=6)
par(cex=1.5,las=1)
plot(specaccum(vegan_otu(subset_transmission)), font=2,lwd=2,xlab = "# of samples", ylab = "# of species",main="Species accumulation in transmission samples" )
dev.off()
## quartz_off_screen 
##                 2
#We have  a correct sampling of the diversity in these samples
my_transmission_samples=c("Skin","Feces","Eggs")
par(mfrow=c(2,2))
for (i in 1:length(my_transmission_samples)) {
        d<-subset_transmission %>% subset_samples(XP_Tissue %in% my_transmission_samples[i])
  plot(specaccum(vegan_otu(d)),font=2,xlab = "# of samples", ylab = "# of species",main=my_transmission_samples[i])
}

#Fin