#############################################
#               METATETARD                  #
# Community analysis of OTUs                #
# on developmental serie 16S rRNA GENE data #
# XPDEV                                     #
#############################################
# Analysis using phyloseq and philr
library(phyloseq)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(phytools)
## Loading required package: ape
## Loading required package: maps
library(philr)
## Registered S3 method overwritten by 'treeio':
##   method     from
##   root.phylo ape
## Folders, Themes, colors
source("prelude.R")

# Setting up directories
data_dir <-paste0(data_dir_path,"xpdev")
output_dir <- paste0(output_dir_path,"Figure2_ordination")


# Importing data
# Defining the directory containing the data to import
setwd(data_dir)

# Importing abundance data
comm<-read.table("nmt3_abondance_tpn_notax_XP_Dev.asc",header=TRUE,row.names=1)
# Getting rid of OTUs with zero abundance
comm<-comm %>% select_if((function(col) is.numeric(col) && sum(col) > 0))
comm<-t(comm)
#Importing metadata
metadata<-read.csv("nmt3_2_metadata_xp_dev.csv",sep=";",header=TRUE,row.names=1)
taxmat<-read.csv("nmt3_Gal18_abondance_tax.csv",sep=";",header=TRUE,row.names=1)
taxmat<-as.matrix(taxmat)
OTU<-otu_table(comm, taxa_are_rows = TRUE)
TAX<-tax_table(taxmat)
sample_data<-sample_data(metadata)
#Importing phylogeny
phy<-read.tree("nmt3_sequences_mpr.tree")
#Rooting the tree using midpoint rooting
phy<-midpoint.root(phy)
physeq.ini<-merge_phyloseq(OTU,sample_data,TAX,phy)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
# We filter OTUs with abundance of more than 3 in 5% of samples
MTDEV<-filter_taxa(physeq.ini, function(x) sum(x > 3) > (0.05*length(x)), TRUE)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
# We use pseudocounts of 1 to compute logs
MTDEV <- transform_sample_counts(MTDEV, function(x) x+1)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
#Check that phylogeny is rooted
is.rooted(phy_tree(MTDEV))
## [1] TRUE
is.binary.tree(phy_tree(MTDEV))
## [1] TRUE
# Naming tree nodes
phy_tree(MTDEV) <- makeNodeLabel(phy_tree(MTDEV), method="number", prefix='n')
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
# Reformatting OTU table for philr
otu.table <- t(otu_table(MTDEV))
tree <- phy_tree(MTDEV)
metadata <- sample_data(MTDEV)
tax <- tax_table(MTDEV)

# Using philR metric
gp.philr <- philr(otu.table, tree, part.weights='enorm.x.gm.counts',ilr.weights='blw.sqrt')
## Building Sequential Binary Partition from Tree...
## Building Contrast Matrix...
## Transforming the Data...
## Calculating ILR Weights...
#Setting the working directory for output files
setwd(output_dir)

#PCoA Ordination
gp.dist <- dist(gp.philr, method="euclidean")
gp.pcoa <- ordinate(MTDEV, 'PCoA', distance=gp.dist)

# Uncomment for plotting the ordination in black and white
#ploto<-plot_ordination(MTDEV, gp.pcoa, shape='LifeStage') +geom_point(size=3,alpha=0.5,fill="grey")+scale_shape_manual(values=c(15,0,16,1,6))+labs(shape="Stage")
#ploto<-ploto+ggtitle("Community composition")+theme_npgray()+ #stat_ellipse(type="norm",linetype=2)+theme(legend.position="top")
#ploto
#ggsave("PCOA_philr.pdf",width=8,height=6)

plotoc<-plot_ordination(MTDEV, gp.pcoa, color='LifeStage') +geom_point(size=3,alpha=0.5)+scale_color_manual(values=c("#d73027","#fc8d59","#4d4d4d","#91bfdb","#4575b4"),breaks=c("Premetamorph","Prometamorph","Metamorph","Froglet","Adult"))
plotoc<-plotoc+ggtitle("Community composition")+theme_npgray()+ stat_ellipse(type="norm",linetype=2)+theme(legend.position="top")
plotoc

ggsave("PCOA_philr_color.pdf",width=8,height=6)

plotoc+guides(color=FALSE)

ggsave("PCOA_philr_color_nolegend.pdf",width=8,height=6)

#perMANOVA (adonis) analysis
library("picante")
## Loading required package: vegan
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-6
## 
## Attaching package: 'vegan'
## The following object is masked from 'package:phytools':
## 
##     scores
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
adonis(gp.dist ~ LifeStage, as(sample_data(MTDEV), "data.frame"))
## 
## Call:
## adonis(formula = gp.dist ~ LifeStage, data = as(sample_data(MTDEV),      "data.frame")) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## LifeStage  4    8783.9 2195.99  17.772 0.71743  0.001 ***
## Residuals 28    3459.7  123.56         0.28257           
## Total     32   12243.7                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Identification of balances to discriminate tadpole/non-tadpole
# Following philr tutorial
sample_data(MTDEV)$tadpole<-factor(get_variable(MTDEV,"LifeStage") %in% c("Premetamorph","Prometamorph"))
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
library("glmnet");
## Loading required package: Matrix
## Loaded glmnet 3.0-2
packageVersion('glmnet')
## [1] '3.0.2'
glmmod <- glmnet(gp.philr, sample_data(MTDEV)$tadpole, alpha=1, family="binomial")
top.coords <- as.matrix(coefficients(glmmod, s=0.2526))
top.coords <- rownames(top.coords)[which(top.coords != 0)]
(top.coords <- top.coords[2:length(top.coords)]) # remove the intercept as a coordinate
## [1] "n129" "n401"
# Naming the balances
tc.names <- sapply(top.coords, function(x) name.balance(tree, tax, x))
tc.names
##                                                n129 
##          "Class_Actinobacteria/Order_Micrococcales" 
##                                                n401 
## "Species_Multi-affiliation/Species_unknown species"
votes <- name.balance(tree, tax, 'n129', return.votes = c('up', 'down'))
votes[[c('up.votes', 'Family')]] # Numerator at Family Level
## votes
##   Micrococcaceae Mycobacteriaceae 
##                2                4
votes[[c('down.votes', 'Family')]] # Denominator at Family Level
## votes
##  Dermatophilaceae Microbacteriaceae 
##                 1                10
votes <- name.balance(tree, tax, 'n401', return.votes = c('up', 'down'))
votes[[c('up.votes', 'Family')]] # Numerator at Family Level
## votes
## Clostridiaceae 1 
##                1
votes[[c('down.votes', 'Family')]] # Denominator at Family Level
## votes
## Clostridiaceae 1 
##                1
# Visualising balances
library(ggtree); packageVersion("ggtree")
## ggtree v2.2.4  For help: https://yulab-smu.github.io/treedata-book/
## 
## If you use ggtree in published research, please cite the most appropriate paper(s):
## 
## - Guangchuang Yu. Using ggtree to visualize data on tree-like structures. Current Protocols in Bioinformatics, 2020, 69:e96. doi:10.1002/cpbi.96
## - Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods for mapping and visualizing associated data on phylogeny using ggtree. Molecular Biology and Evolution 2018, 35(12):3041-3043. doi:10.1093/molbev/msy194
## - Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam. ggtree: an R package for visualization and annotation of phylogenetic trees with their covariates and other associated data. Methods in Ecology and Evolution 2017, 8(1):28-36. doi:10.1111/2041-210X.12628
## 
## Attaching package: 'ggtree'
## The following object is masked from 'package:Matrix':
## 
##     expand
## The following object is masked from 'package:nlme':
## 
##     collapse
## The following object is masked from 'package:ape':
## 
##     rotate
## [1] '2.2.4'
library(dplyr); packageVersion('dplyr')
## [1] '1.0.2'
tc.nn <- name.to.nn(tree, top.coords)
tc.colors <- c('#a6cee3', '#1f78b4')

p <- ggtree(tree, layout='fan') +
   geom_balance(node=tc.nn[1], fill=tc.colors[1], alpha=0.6) +
   geom_balance(node=tc.nn[2], fill=tc.colors[4], alpha=0.6)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
p <- annotate_balance(tree, 'n129', p=p, labels = c('n129+', 'n129-'),
                  offset.text=0.15, bar=FALSE)

p <- annotate_balance(tree, 'n401', p=p, labels = c('n401+', 'n401-'),
                  offset.text=0.15, bar=FALSE)
p

ggsave("Tree_balance_tadpole_dev_philr.pdf",width=8,height=8)


gp.philr.long <- convert_to_long(gp.philr, get_variable(MTDEV, 'tadpole')) %>%
   filter(coord %in% top.coords)

ggplot(gp.philr.long, aes(x=labels, y=value)) +
geom_boxplot(fill='lightgrey') +
facet_grid(.~coord, scales='free_x') +
xlab('Tadpole') + ylab('Balance Value') +
theme_bw()

ggsave("Barplot_balance_tadpole_dev_philr.pdf",width=8,height=8)


library(tidyr); packageVersion('tidyr')
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:ggtree':
## 
##     expand
## The following objects are masked from 'package:Matrix':
## 
##     expand, pack, unpack
## [1] '1.1.2'
gp.philr.long %>%
rename(Tadpole=labels) %>%
filter(coord %in% c('n129', 'n401')) %>%
spread(coord, value) %>%
ggplot(aes(x=n129, y=n401, color=Tadpole)) +
geom_point(size=4) +
xlab(tc.names['n129']) + ylab(tc.names['n401']) +
theme_bw()

###############################################################
#Identification of balances to discriminate adult/non-adult
# Following philr tutorial
sample_data(MTDEV)$adult<-factor(get_variable(MTDEV,"LifeStage") %in% c("Adult","Froglet"))
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
library(glmnet); packageVersion('glmnet')
## [1] '3.0.2'
glmmod <- glmnet(gp.philr, sample_data(MTDEV)$adult, alpha=1, family="binomial")
top.coords <- as.matrix(coefficients(glmmod, s=0.2526))
top.coords <- rownames(top.coords)[which(top.coords != 0)]
(top.coords <- top.coords[2:length(top.coords)]) # remove the intercept as a coordinate
## [1] "n94"  "n124" "n415" "n416"
# Dénomination des balances
tc.names <- sapply(top.coords, function(x) name.balance(tree, tax, x))
tc.names
##                                                 n94 
##                   "Domain_Bacteria/Domain_Bacteria" 
##                                                n124 
##                 "Domain_Bacteria/Phylum_Firmicutes" 
##                                                n415 
## "Species_Multi-affiliation/Species_unknown species" 
##                                                n416 
##   "Family_Peptostreptococcaceae/Family_Family XIII"
votes <- name.balance(tree, tax, 'n94', return.votes = c('up', 'down'))
votes[[c('up.votes', 'Family')]] # Numerator at Family Level
## votes
## Erysipelotrichaceae    Mycoplasmataceae     Spirochaetaceae      unknown family 
##                  17                   1                   1                   8
votes[[c('down.votes', 'Family')]] # Denominator at Family Level
## votes
##                Acidaminococcaceae                    Aeromonadaceae 
##                                 1                                 1 
##                    Alcaligenaceae                 Bradyrhizobiaceae 
##                                 1                                 3 
##                  Burkholderiaceae                  Caulobacteraceae 
##                                 4                                 1 
##                  Cellvibrionaceae               Christensenellaceae 
##                                 1                                 4 
##                     Chromatiaceae Chthoniobacterales Incertae Sedis 
##                                 5                                 1 
##                  Clostridiaceae 1     Clostridiales vadinBB60 group 
##                                20                                 2 
##                    Comamonadaceae                 Coriobacteriaceae 
##                                18                                 1 
##                Deferribacteraceae                  Dermatophilaceae 
##                                 1                                 1 
##               Desulfovibrionaceae                Enterobacteriaceae 
##                                25                                 3 
##                   Enterococcaceae               Erysipelotrichaceae 
##                                 3                                 1 
##                Erythrobacteraceae                    Eubacteriaceae 
##                                 1                                 2 
##                        Family XII                       Family XIII 
##                                 2                                10 
##                  Fibrobacteraceae                    Halomonadaceae 
##                                 1                                 1 
##                Herpetosiphonaceae                 Hyphomicrobiaceae 
##                                 1                                 1 
##                   Lachnospiraceae               Methylobacteriaceae 
##                                45                                 1 
##                  Methylocystaceae                 Microbacteriaceae 
##                                 2                                10 
##                    Micrococcaceae                     Moraxellaceae 
##                                 2                                 4 
##                 Multi-affiliation                  Mycobacteriaceae 
##                                10                                 4 
##                     Neisseriaceae                 Nitrosomonadaceae 
##                                 3                                 1 
##                    Nitrospiraceae                             OPB56 
##                                 1                                 1 
##                  Oxalobacteraceae             Peptostreptococcaceae 
##                                 2                                 3 
##                    Planococcaceae                  Pseudomonadaceae 
##                                 3                                 8 
##                      Rhizobiaceae                  Rhodobacteraceae 
##                                 2                                 9 
##                    Rhodocyclaceae                 Rhodospirillaceae 
##                                 8                                13 
##   Rhodospirillales Incertae Sedis      Rickettsiales Incertae Sedis 
##                                 1                                 1 
##                   Ruminococcaceae                    Shewanellaceae 
##                                76                                 1 
##                 Sphingomonadaceae                  Streptococcaceae 
##                                 6                                 2 
##                    Synergistaceae                    unknown family 
##                                 2                                 4 
##               Verrucomicrobiaceae                  Xanthomonadaceae 
##                                 7                                 4
votes <- name.balance(tree, tax, 'n124', return.votes = c('up', 'down'))
votes[[c('up.votes', 'Family')]] # Numerator at Family Level
## votes
##                    Aeromonadaceae                    Alcaligenaceae 
##                                 1                                 1 
##                 Bradyrhizobiaceae                  Burkholderiaceae 
##                                 3                                 4 
##                  Caulobacteraceae                  Cellvibrionaceae 
##                                 1                                 1 
##                     Chromatiaceae Chthoniobacterales Incertae Sedis 
##                                 5                                 1 
##                    Comamonadaceae                 Coriobacteriaceae 
##                                18                                 1 
##                  Dermatophilaceae               Desulfovibrionaceae 
##                                 1                                25 
##                Enterobacteriaceae                Erythrobacteraceae 
##                                 3                                 1 
##                  Fibrobacteraceae                    Halomonadaceae 
##                                 1                                 1 
##                Herpetosiphonaceae                 Hyphomicrobiaceae 
##                                 1                                 1 
##               Methylobacteriaceae                  Methylocystaceae 
##                                 1                                 2 
##                 Microbacteriaceae                    Micrococcaceae 
##                                10                                 2 
##                     Moraxellaceae                 Multi-affiliation 
##                                 4                                 8 
##                  Mycobacteriaceae                     Neisseriaceae 
##                                 4                                 3 
##                 Nitrosomonadaceae                    Nitrospiraceae 
##                                 1                                 1 
##                             OPB56                  Oxalobacteraceae 
##                                 1                                 2 
##                  Pseudomonadaceae                      Rhizobiaceae 
##                                 8                                 2 
##                  Rhodobacteraceae                    Rhodocyclaceae 
##                                 9                                 8 
##                 Rhodospirillaceae   Rhodospirillales Incertae Sedis 
##                                13                                 1 
##      Rickettsiales Incertae Sedis                   Ruminococcaceae 
##                                 1                                 1 
##                    Shewanellaceae                 Sphingomonadaceae 
##                                 1                                 6 
##                    unknown family               Verrucomicrobiaceae 
##                                 4                                 7 
##                  Xanthomonadaceae 
##                                 4
votes[[c('down.votes', 'Family')]] # Denominator at Family Level
## votes
##            Acidaminococcaceae           Christensenellaceae 
##                             1                             4 
##              Clostridiaceae 1 Clostridiales vadinBB60 group 
##                            20                             2 
##               Enterococcaceae           Erysipelotrichaceae 
##                             3                             1 
##                Eubacteriaceae                    Family XII 
##                             2                             2 
##                   Family XIII               Lachnospiraceae 
##                            10                            45 
##             Multi-affiliation         Peptostreptococcaceae 
##                             2                             3 
##                Planococcaceae               Ruminococcaceae 
##                             3                            75 
##              Streptococcaceae 
##                             2
votes <- name.balance(tree, tax, 'n415', return.votes = c('up', 'down'))
votes[[c('up.votes', 'Family')]] # Numerator at Family Level
## votes
## Clostridiaceae 1 
##                1
votes[[c('down.votes', 'Family')]] # Denominator at Family Level
## votes
## Clostridiaceae 1 
##                1
votes <- name.balance(tree, tax, 'n416', return.votes = c('up', 'down'))
votes[[c('up.votes', 'Family')]] # Numerator at Family Level
## votes
## Peptostreptococcaceae 
##                     3
votes[[c('down.votes', 'Family')]] # Denominator at Family Level
## votes
## Family XIII 
##          10
# Visualisation des balances
library(ggtree); packageVersion("ggtree")
## [1] '2.2.4'
library(dplyr); packageVersion('dplyr')
## [1] '1.0.2'
tc.nn <- name.to.nn(tree, top.coords)
tc.colors <- c('#a6cee3', '#1f78b4','#b2df8a', '#33a02c')

p <- ggtree(tree, layout='fan') +
geom_balance(node=tc.nn[1], fill=tc.colors[1], alpha=0.6) +
geom_balance(node=tc.nn[2], fill=tc.colors[2], alpha=0.6) +
geom_balance(node=tc.nn[3], fill=tc.colors[3], alpha=0.6) +
geom_balance(node=tc.nn[4], fill=tc.colors[4], alpha=0.6)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
p <- annotate_balance(tree, 'n94', p=p, labels = c('n94+', 'n94-'),
offset.text=0.15, bar=FALSE)

p <- annotate_balance(tree, 'n124', p=p, labels = c('n124+', 'n124-'),
offset.text=0.15, bar=FALSE)

p <- annotate_balance(tree, 'n415', p=p, labels = c('n415+', 'n415-'),
offset.text=0.15, bar=FALSE)

p <- annotate_balance(tree, 'n416', p=p, labels = c('n416+', 'n416-'),
offset.text=0.15, bar=FALSE)


p

ggsave("Tree_balance_adult_dev_philr.pdf",width=8,height=8)


gp.philr.long <- convert_to_long(gp.philr, get_variable(MTDEV, 'adult')) %>%
filter(coord %in% top.coords)

ggplot(gp.philr.long, aes(x=labels, y=value)) +
geom_boxplot(fill='lightgrey') +
facet_grid(.~coord, scales='free_x') +
xlab('Adult') + ylab('Balance Value') +
theme_bw()

ggsave("Barplot_balance_adult_dev_philr.pdf",width=8,height=8)


library(tidyr); packageVersion('tidyr')
## [1] '1.1.2'
gp.philr.long %>%
rename(Adult=labels) %>%
filter(coord %in% c('n124', 'n94')) %>%
spread(coord, value) %>%
ggplot(aes(x=n124, y=n94, color=Adult)) +
geom_point(size=4) +
xlab(tc.names['n124']) + ylab(tc.names['n94']) +
theme_bw()