#############################################
#               METATETARD                  #
# Community analysis of OTUs                #
# on developmental serie 16S rRNA GENE data #
# XPDEV                                     #
#############################################
###################################################################
# Prevalence analysis on Xenopus developmental serie 16S DNA data #
###################################################################

library(microbiome)
## Loading required package: phyloseq
## Loading required package: ggplot2
## 
## microbiome R package (microbiome.github.com)
##     
## 
## 
##  Copyright (C) 2011-2020 Leo Lahti, 
##     Sudarshan Shetty et al. <microbiome.github.io>
## 
## Attaching package: 'microbiome'
## The following object is masked from 'package:ggplot2':
## 
##     alpha
## The following object is masked from 'package:base':
## 
##     transform
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(phytools)
## Loading required package: ape
## Loading required package: maps
library(viridis)
## Loading required package: viridisLite
library(RColorBrewer)

## Folders, Themes, colors
source("prelude.R")

# Setting up directories
data_dir <-paste0(data_dir_path,"xpdev")
output_dir <- paste0(output_dir_path,"Supp_Figure_2")


# Importing data
# Defining the directory containing the data to import
setwd(data_dir)
# Importing abundance data
comm<-read.table("nmt3_abondance_tpn_notax_XP_Dev.asc", header=TRUE, row.names=1)
# Getting rid of OTUs with zero abundance
comm<-comm %>% select_if((function(col) is.numeric(col) && sum(col) > 0))
comm<-t(comm)
# Importing metadata
metadata<-read.csv("nmt3_2_metadata_xp_dev.csv",sep=";",header=TRUE,row.names=1)
taxmat<-read.csv("nmt3_Gal18_abondance_tax.csv",sep=";",header=TRUE,row.names=1)
taxmat<-as.matrix(taxmat)
OTU = otu_table(comm, taxa_are_rows = TRUE)
TAX = tax_table(taxmat)
sample_data = sample_data(metadata)
# Importing phylogeny
phy<-read.tree("nmt3_sequences_mpr.tree")
# Rooting the tree using midpoint rooting
phy<-midpoint.root(phy)
physeq.ini = merge_phyloseq(OTU,sample_data,TAX,phy)

# Transformation to relative abundance
physeq.rel <- microbiome::transform(physeq.ini, "compositional")


#############################################################################################################
# We determine the size of the core microbiome using a set of abundance and prevalence thresholds           #
# see Salonen et al. CMI, 2012                                                                               #
#############################################################################################################

# Core line plot
det <- c(0, 0.1, 0.5, 2, 5, 20)/100
prevalences <- seq(.05, 1, .05)
plot_core(physeq.rel, prevalences = prevalences, detections = det, plot.type = "lineplot") + xlab("Relative Abundance in %") + theme_npgray()
## Warning: `legend.margin` must be specified using `margin()`. For the old
## behavior use legend.spacing
## Warning: Transformation introduced infinite values in continuous x-axis

## Warning: Transformation introduced infinite values in continuous x-axis

#################################################
# Core microbiome visualization using heatmaps  #
# Core microbiome definition  :                 #
# with 50% minimal prevalence and minimum       #
# detection threshold of 0.001 %                #
#################################################

########################################################################
#      50% minimal prevalence and minimum detection threshold of 0.001 #
########################################################################
physeq1.core <- core(physeq.rel, detection = 0.001, prevalence = .5)

# Fetching taxa names corresponding to the core microbiota OTUs
core.taxa <- taxa(physeq1.core)

# Fetching taxonomy  corresponding to the core microbiota OTUs
tax.mat <- tax_table(physeq1.core)
tax.df <- as.data.frame(tax.mat)

# Adding OTU identifiers
tax.df$OTU <- rownames(tax.df)

# select taxonomy of only
# those OTUs that are core memebers based on the thresholds that were used.
core.taxa.class <- dplyr::filter(tax.df, rownames(tax.df) %in% core.taxa)

# Saving a file containing the list of OTUs and their taxonomy
setwd(output_dir)
write.table(core.taxa.class,file="core1_dev_taxa_otu.csv",sep=";",row.names=TRUE,col.names=TRUE)

# Selection of core microbiota OTUs in the starting physeq.ini
my_subset <- subset(otu_table(physeq.ini), rownames(otu_table(physeq.ini)) %in% tax.df$OTU)
physeq.core.ini <- merge_phyloseq(my_subset, tax_table(physeq.ini), sample_data(physeq.ini),phy_tree(physeq.ini))


##########################
# Plotting the heat map  #
##########################

# Defining the output dir
setwd(output_dir)

prevalences <- seq(.05, 1, .05)
detections <- 10^seq(log10(1e-3), log10(.2), length = 20)

# Graphique en heatmap

plot_core(physeq.rel, plot.type = "heatmap", prevalences = prevalences, detections = detections, min.prevalence = .5,colours = rev(brewer.pal(5, "Spectral")),)+theme_npgray()
## Warning: `legend.margin` must be specified using `margin()`. For the old
## behavior use legend.spacing

ggsave("core_xpdev_heatmap_prev_05_raw.pdf",width=8,height=6)

# I fetched the list of OTUs and their order from the pdf of plot_core
# Then I replace OTUs IDs by a more informative taxonomic name
# A partir du pdf du plot_core sans modification des légendes y (:-o) et je le renseigne dans le fichier core1_taxa_otu.xlsx pour remplacer les noms d'OTU par un nom taxonomique
# Je récupère la liste des OTUs et l'ordre dans lequel ils sont affichés dans le plot brut à partir du core_xpdev_heatmap_prev_05_raw.pdf (sans modification des légendes y (:-o)) et je le renseigne dans le fichier core1_dev_taxa_otu.xlsx pour remplacer les noms d'OTU par un nom taxonomique

plot_core(physeq.rel, plot.type = "heatmap", prevalences = prevalences, detections = detections, min.prevalence = .5,colours = rev(brewer.pal(5, "Spectral")),)+theme_npgray()+scale_x_discrete(breaks=detections,labels=c("0.001","","","","","","0.005","","0.010","","0.016","","","","0.05","","","","",""),name="Detection threshold in % of relative abundance")+scale_y_discrete(labels=c("Desulfovibrio sp","Hydrogenoanaerobacterium sp","Lachnospiraceae NK4A136 group sp","Rhizobiales sp","Hydrogenoanaerobacterium sp","Clostridium sensu stricto 1 sp","Microbacteriaceae bacterium MAEY21","Desulfovibrio sp","Desulfovibrio sp","Alpha proteobacterium UT-4")) + theme(legend.text=element_text(size=10))
## Warning: `legend.margin` must be specified using `margin()`. For the old
## behavior use legend.spacing

ggsave("core_xpdev_heatmap_prev_05.pdf",width=8,height=6)


plot_heatmap(physeq.core.ini,sample.label="Description",low="#66CCFF", high="#000033", na.value="white")+scale_x_discrete(limits=c(
"XT1A4G","XT1A5G","XT1B2G","XT1B2Gbis","XT1B4G","XT1B5G","XT1A2G","XT1B1G","XTLS13","XTLS14","XTLS15","XTLS16","XTLS17","XTLS12","XTLS2","XTLS3","XTLS4","XTLS5","XTLS6","XTLS1","XTLS10","XTLS11","XTLS7","XTLS8","XTLS9","XTLSM1","XTLSM2","XTLSM3","XTLTW2M","XTLTW3F","XTLTW3M","XTLTW4F","XTLTW5F"))+theme_npgray()+ theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning in psmelt(physeq): The sample variables: 
## Sample
##  have been renamed to: 
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
## Warning in psmelt(physeq): The sample variables: 
## Species
##  have been renamed to: 
## sample_Species
## to avoid conflicts with taxonomic rank names.
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: `legend.margin` must be specified using `margin()`. For the old
## behavior use legend.spacing
## Warning: Transformation introduced infinite values in discrete y-axis

ggsave("core_xpdev_heatmap_abo_1.pdf",width=8,height=6)
## Warning: Transformation introduced infinite values in discrete y-axis
# Same graph with OTUs in the same order as the prevalence plot, and grouped by developmental stages
plot_heatmap(physeq.core.ini,sample.label="Description",low="#66CCFF", high="#000033", na.value="white")+scale_x_discrete(limits=c(
"XT1A4G","XT1A5G","XT1B2G","XT1B2Gbis","XT1B4G","XT1B5G","XT1A2G","XT1B1G","XTLS13","XTLS14","XTLS15","XTLS16","XTLS17","XTLS12","XTLS2","XTLS3","XTLS4","XTLS5","XTLS6","XTLS1","XTLS10","XTLS11","XTLS7","XTLS8","XTLS9","XTLSM1","XTLSM2","XTLSM3","XTLTW2M","XTLTW3F","XTLTW3M","XTLTW4F","XTLTW5F"))+scale_y_discrete(limits=c("Cluster_53","Cluster_75","Cluster_70","Cluster_6","Cluster_41","Cluster_11","Cluster_2","Cluster_3","Cluster_7","Cluster_1"))+theme_npgray()+ theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning in psmelt(physeq): The sample variables: 
## Sample
##  have been renamed to: 
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
## Warning in psmelt(physeq): The sample variables: 
## Species
##  have been renamed to: 
## sample_Species
## to avoid conflicts with taxonomic rank names.
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: `legend.margin` must be specified using `margin()`. For the old
## behavior use legend.spacing
## Warning: Transformation introduced infinite values in discrete y-axis

ggsave("core_xpdev_heatmap_abo_2.pdf",width=8,height=6)
## Warning: Transformation introduced infinite values in discrete y-axis
# Same graph with OTUs changed to their taxonomy
plot_heatmap(physeq.core.ini,sample.label="Description",low="#66CCFF", high="#000033", na.value="white")+scale_x_discrete(limits=c("XT1A4G",
"XT1A5G","XT1B2G","XT1B2Gbis","XT1B4G","XT1B5G","XT1A2G","XT1B1G","XTLS13","XTLS14","XTLS15","XTLS16","XTLS17","XTLS12","XTLS2","XTLS3","XTLS4","XTLS5","XTLS6","XTLS1","XTLS10","XTLS11","XTLS7","XTLS8","XTLS9","XTLSM1","XTLSM2","XTLSM3","XTLTW2M","XTLTW3F","XTLTW3M","XTLTW4F","XTLTW5F"),labels=c("NF55","NF55","NF55","NF55","NF55","NF55","NF55","NF56","NF57","NF58","NF58","NF58","NF58","NF61","NF62","NF62","NF62","NF62","NF62","NF66","NF66","NF66","NF66","NF66","NF66","Adult","Adult","Adult","Adult","Adult","Adult","Adult","Adult"))+scale_y_discrete(limits=c("Cluster_53","Cluster_75","Cluster_70","Cluster_6","Cluster_41","Cluster_11","Cluster_2","Cluster_3","Cluster_7","Cluster_1"),labels=c("Desulfovibrio sp","Hydrogenoanaerobacterium sp","Lachnospiraceae NK4A136 group sp","Rhizobiales sp","Hydrogenoanaerobacterium sp","Clostridium sensu stricto 1 sp","Microbacteriaceae bacterium MAEY21","Desulfovibrio sp","Desulfovibrio sp","alpha proteobacterium UT-4"))+theme_npgray()+ theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning in psmelt(physeq): The sample variables: 
## Sample
##  have been renamed to: 
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
## Warning in psmelt(physeq): The sample variables: 
## Species
##  have been renamed to: 
## sample_Species
## to avoid conflicts with taxonomic rank names.
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: `legend.margin` must be specified using `margin()`. For the old
## behavior use legend.spacing
## Warning: Transformation introduced infinite values in discrete y-axis

ggsave("core_xpdev_heatmap_abo_3.pdf",width=8,height=6)
## Warning: Transformation introduced infinite values in discrete y-axis
##################################################################################
#   Core with 50% minimal prevalence and minimum detection threshold of 0.00001  #
##################################################################################
physeq2.core <- core(physeq.rel, detection = 0.00001, prevalence = .5)

#Fetching taxa names corresponding to the core microbiota OTUs
core.taxa <- taxa(physeq2.core)

#Fetching taxonomy corresponding to the core microbiota OTUs
tax.mat <- tax_table(physeq2.core)
tax.df <- as.data.frame(tax.mat)

# Adding OTU identifiers
tax.df$OTU <- rownames(tax.df)

# select taxonomy of only
# those OTUs that are core memebers based on the thresholds that were used.
core.taxa.class <- dplyr::filter(tax.df, rownames(tax.df) %in% core.taxa)

# Save a file with OTU IDs and taxonomy
setwd(output_dir)
write.table(core.taxa.class,file="core2_dev_taxa_otu.csv",sep=";",row.names=TRUE,col.names=TRUE)

# Select OTUs from core microbiota in physeq.ini
#
my_subset <- subset(otu_table(physeq.ini), rownames(otu_table(physeq.ini)) %in% tax.df$OTU)
physeq2.core.ini <- merge_phyloseq(my_subset, tax_table(physeq.ini), sample_data(physeq.ini),phy_tree(physeq.ini))


# Heatmap with a much lower abundance thresholds
prevalences <- seq(.1, 1, .1)
detections <- 10^seq(log10(1e-5), log10(.2), length = 20)

plot_core(physeq.rel, plot.type = "heatmap", prevalences = prevalences, detections = detections, min.prevalence = .5,colours = rev(brewer.pal(5, "Spectral")),)+scale_x_discrete(breaks=detections,labels=c("1e-5","","","5e-5","","1e-4","","","","1e-3","","","","","0.015","","","","",""),name="Detection threshold in % of relative abundance")

# Same plot with taxonomy insteat of OTU_ID
plot_heatmap(physeq2.core.ini,sample.label="Description",low="#66CCFF", high="#000033",na.value="white")+scale_x_discrete(limits=c("XT1A4G","XT1A5G","XT1B2G","XT1B2Gbis","XT1B4G","XT1B5G","XT1A2G","XT1B1G","XTLS13","XTLS14","XTLS15","XTLS16","XTLS17","XTLS12","XTLS2","XTLS3","XTLS4","XTLS5","XTLS6","XTLS1","XTLS10","XTLS11","XTLS7","XTLS8","XTLS9","XTLSM1","XTLSM2","XTLSM3","XTLTW2M","XTLTW3F","XTLTW3M","XTLTW4F","XTLTW5F"),labels=c("NF55","NF55","NF55","NF55","NF55","NF55","NF56","NF56","NF57","NF58","NF58","NF58","NF58","NF61","NF62","NF62","NF62","NF62","NF62","NF66","NF66","NF66","NF66","NF66","NF66","Adult","Adult","Adult","Adult","Adult","Adult","Adult","Adult"))
## Warning in psmelt(physeq): The sample variables: 
## Sample
##  have been renamed to: 
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
## Warning in psmelt(physeq): The sample variables: 
## Species
##  have been renamed to: 
## sample_Species
## to avoid conflicts with taxonomic rank names.
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Transformation introduced infinite values in discrete y-axis