#############################################
# METATETARD #
# Community analysis of OTUs #
# on developmental serie 16S rRNA GENE data #
# XPDEV #
#############################################
# Analysis using phyloseq and philr
library(phyloseq)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(phytools)
## Loading required package: ape
## Loading required package: maps
library(philr)
## Registered S3 method overwritten by 'treeio':
## method from
## root.phylo ape
## Folders, Themes, colors
source("prelude.R")
# Setting up directories
data_dir <-paste0(data_dir_path,"xpdev")
output_dir <- paste0(output_dir_path,"Figure2_ordination")
# Importing data
# Defining the directory containing the data to import
setwd(data_dir)
# Importing abundance data
comm<-read.table("nmt3_abondance_tpn_notax_XP_Dev.asc",header=TRUE,row.names=1)
# Getting rid of OTUs with zero abundance
comm<-comm %>% select_if((function(col) is.numeric(col) && sum(col) > 0))
comm<-t(comm)
#Importing metadata
metadata<-read.csv("nmt3_2_metadata_xp_dev.csv",sep=";",header=TRUE,row.names=1)
taxmat<-read.csv("nmt3_Gal18_abondance_tax.csv",sep=";",header=TRUE,row.names=1)
taxmat<-as.matrix(taxmat)
OTU<-otu_table(comm, taxa_are_rows = TRUE)
TAX<-tax_table(taxmat)
sample_data<-sample_data(metadata)
#Importing phylogeny
phy<-read.tree("nmt3_sequences_mpr.tree")
#Rooting the tree using midpoint rooting
phy<-midpoint.root(phy)
physeq.ini<-merge_phyloseq(OTU,sample_data,TAX,phy)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
# We filter OTUs with abundance of more than 3 in 5% of samples
MTDEV<-filter_taxa(physeq.ini, function(x) sum(x > 3) > (0.05*length(x)), TRUE)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
# We use pseudocounts of 1 to compute logs
MTDEV <- transform_sample_counts(MTDEV, function(x) x+1)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
#Check that phylogeny is rooted
is.rooted(phy_tree(MTDEV))
## [1] TRUE
is.binary.tree(phy_tree(MTDEV))
## [1] TRUE
# Naming tree nodes
phy_tree(MTDEV) <- makeNodeLabel(phy_tree(MTDEV), method="number", prefix='n')
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
# Reformatting OTU table for philr
otu.table <- t(otu_table(MTDEV))
tree <- phy_tree(MTDEV)
metadata <- sample_data(MTDEV)
tax <- tax_table(MTDEV)
# Using philR metric
gp.philr <- philr(otu.table, tree, part.weights='enorm.x.gm.counts',ilr.weights='blw.sqrt')
## Building Sequential Binary Partition from Tree...
## Building Contrast Matrix...
## Transforming the Data...
## Calculating ILR Weights...
#Setting the working directory for output files
setwd(output_dir)
#PCoA Ordination
gp.dist <- dist(gp.philr, method="euclidean")
gp.pcoa <- ordinate(MTDEV, 'PCoA', distance=gp.dist)
# Uncomment for plotting the ordination in black and white
#ploto<-plot_ordination(MTDEV, gp.pcoa, shape='LifeStage') +geom_point(size=3,alpha=0.5,fill="grey")+scale_shape_manual(values=c(15,0,16,1,6))+labs(shape="Stage")
#ploto<-ploto+ggtitle("Community composition")+theme_npgray()+ #stat_ellipse(type="norm",linetype=2)+theme(legend.position="top")
#ploto
#ggsave("PCOA_philr.pdf",width=8,height=6)
plotoc<-plot_ordination(MTDEV, gp.pcoa, color='LifeStage') +geom_point(size=3,alpha=0.5)+scale_color_manual(values=c("#d73027","#fc8d59","#4d4d4d","#91bfdb","#4575b4"),breaks=c("Premetamorph","Prometamorph","Metamorph","Froglet","Adult"))
plotoc<-plotoc+ggtitle("Community composition")+theme_npgray()+ stat_ellipse(type="norm",linetype=2)+theme(legend.position="top")
plotoc
ggsave("PCOA_philr_color.pdf",width=8,height=6)
plotoc+guides(color=FALSE)
ggsave("PCOA_philr_color_nolegend.pdf",width=8,height=6)
#perMANOVA (adonis) analysis
library("picante")
## Loading required package: vegan
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-6
##
## Attaching package: 'vegan'
## The following object is masked from 'package:phytools':
##
## scores
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
adonis(gp.dist ~ LifeStage, as(sample_data(MTDEV), "data.frame"))
##
## Call:
## adonis(formula = gp.dist ~ LifeStage, data = as(sample_data(MTDEV), "data.frame"))
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## LifeStage 4 8783.9 2195.99 17.772 0.71743 0.001 ***
## Residuals 28 3459.7 123.56 0.28257
## Total 32 12243.7 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Identification of balances to discriminate tadpole/non-tadpole
# Following philr tutorial
sample_data(MTDEV)$tadpole<-factor(get_variable(MTDEV,"LifeStage") %in% c("Premetamorph","Prometamorph"))
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
library("glmnet");
## Loading required package: Matrix
## Loaded glmnet 3.0-2
packageVersion('glmnet')
## [1] '3.0.2'
glmmod <- glmnet(gp.philr, sample_data(MTDEV)$tadpole, alpha=1, family="binomial")
top.coords <- as.matrix(coefficients(glmmod, s=0.2526))
top.coords <- rownames(top.coords)[which(top.coords != 0)]
(top.coords <- top.coords[2:length(top.coords)]) # remove the intercept as a coordinate
## [1] "n129" "n401"
# Naming the balances
tc.names <- sapply(top.coords, function(x) name.balance(tree, tax, x))
tc.names
## n129
## "Class_Actinobacteria/Order_Micrococcales"
## n401
## "Species_Multi-affiliation/Species_unknown species"
votes <- name.balance(tree, tax, 'n129', return.votes = c('up', 'down'))
votes[[c('up.votes', 'Family')]] # Numerator at Family Level
## votes
## Micrococcaceae Mycobacteriaceae
## 2 4
votes[[c('down.votes', 'Family')]] # Denominator at Family Level
## votes
## Dermatophilaceae Microbacteriaceae
## 1 10
votes <- name.balance(tree, tax, 'n401', return.votes = c('up', 'down'))
votes[[c('up.votes', 'Family')]] # Numerator at Family Level
## votes
## Clostridiaceae 1
## 1
votes[[c('down.votes', 'Family')]] # Denominator at Family Level
## votes
## Clostridiaceae 1
## 1
# Visualising balances
library(ggtree); packageVersion("ggtree")
## ggtree v2.2.4 For help: https://yulab-smu.github.io/treedata-book/
##
## If you use ggtree in published research, please cite the most appropriate paper(s):
##
## [36m-[39m Guangchuang Yu. Using ggtree to visualize data on tree-like structures. Current Protocols in Bioinformatics, 2020, 69:e96. doi:10.1002/cpbi.96
## [36m-[39m Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods for mapping and visualizing associated data on phylogeny using ggtree. Molecular Biology and Evolution 2018, 35(12):3041-3043. doi:10.1093/molbev/msy194
## [36m-[39m Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam. ggtree: an R package for visualization and annotation of phylogenetic trees with their covariates and other associated data. Methods in Ecology and Evolution 2017, 8(1):28-36. doi:10.1111/2041-210X.12628
##
## Attaching package: 'ggtree'
## The following object is masked from 'package:Matrix':
##
## expand
## The following object is masked from 'package:nlme':
##
## collapse
## The following object is masked from 'package:ape':
##
## rotate
## [1] '2.2.4'
library(dplyr); packageVersion('dplyr')
## [1] '1.0.2'
tc.nn <- name.to.nn(tree, top.coords)
tc.colors <- c('#a6cee3', '#1f78b4')
p <- ggtree(tree, layout='fan') +
geom_balance(node=tc.nn[1], fill=tc.colors[1], alpha=0.6) +
geom_balance(node=tc.nn[2], fill=tc.colors[4], alpha=0.6)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
p <- annotate_balance(tree, 'n129', p=p, labels = c('n129+', 'n129-'),
offset.text=0.15, bar=FALSE)
p <- annotate_balance(tree, 'n401', p=p, labels = c('n401+', 'n401-'),
offset.text=0.15, bar=FALSE)
p
ggsave("Tree_balance_tadpole_dev_philr.pdf",width=8,height=8)
gp.philr.long <- convert_to_long(gp.philr, get_variable(MTDEV, 'tadpole')) %>%
filter(coord %in% top.coords)
ggplot(gp.philr.long, aes(x=labels, y=value)) +
geom_boxplot(fill='lightgrey') +
facet_grid(.~coord, scales='free_x') +
xlab('Tadpole') + ylab('Balance Value') +
theme_bw()
ggsave("Barplot_balance_tadpole_dev_philr.pdf",width=8,height=8)
library(tidyr); packageVersion('tidyr')
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:ggtree':
##
## expand
## The following objects are masked from 'package:Matrix':
##
## expand, pack, unpack
## [1] '1.1.2'
gp.philr.long %>%
rename(Tadpole=labels) %>%
filter(coord %in% c('n129', 'n401')) %>%
spread(coord, value) %>%
ggplot(aes(x=n129, y=n401, color=Tadpole)) +
geom_point(size=4) +
xlab(tc.names['n129']) + ylab(tc.names['n401']) +
theme_bw()
###############################################################
#Identification of balances to discriminate adult/non-adult
# Following philr tutorial
sample_data(MTDEV)$adult<-factor(get_variable(MTDEV,"LifeStage") %in% c("Adult","Froglet"))
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
library(glmnet); packageVersion('glmnet')
## [1] '3.0.2'
glmmod <- glmnet(gp.philr, sample_data(MTDEV)$adult, alpha=1, family="binomial")
top.coords <- as.matrix(coefficients(glmmod, s=0.2526))
top.coords <- rownames(top.coords)[which(top.coords != 0)]
(top.coords <- top.coords[2:length(top.coords)]) # remove the intercept as a coordinate
## [1] "n94" "n124" "n415" "n416"
# Dénomination des balances
tc.names <- sapply(top.coords, function(x) name.balance(tree, tax, x))
tc.names
## n94
## "Domain_Bacteria/Domain_Bacteria"
## n124
## "Domain_Bacteria/Phylum_Firmicutes"
## n415
## "Species_Multi-affiliation/Species_unknown species"
## n416
## "Family_Peptostreptococcaceae/Family_Family XIII"
votes <- name.balance(tree, tax, 'n94', return.votes = c('up', 'down'))
votes[[c('up.votes', 'Family')]] # Numerator at Family Level
## votes
## Erysipelotrichaceae Mycoplasmataceae Spirochaetaceae unknown family
## 17 1 1 8
votes[[c('down.votes', 'Family')]] # Denominator at Family Level
## votes
## Acidaminococcaceae Aeromonadaceae
## 1 1
## Alcaligenaceae Bradyrhizobiaceae
## 1 3
## Burkholderiaceae Caulobacteraceae
## 4 1
## Cellvibrionaceae Christensenellaceae
## 1 4
## Chromatiaceae Chthoniobacterales Incertae Sedis
## 5 1
## Clostridiaceae 1 Clostridiales vadinBB60 group
## 20 2
## Comamonadaceae Coriobacteriaceae
## 18 1
## Deferribacteraceae Dermatophilaceae
## 1 1
## Desulfovibrionaceae Enterobacteriaceae
## 25 3
## Enterococcaceae Erysipelotrichaceae
## 3 1
## Erythrobacteraceae Eubacteriaceae
## 1 2
## Family XII Family XIII
## 2 10
## Fibrobacteraceae Halomonadaceae
## 1 1
## Herpetosiphonaceae Hyphomicrobiaceae
## 1 1
## Lachnospiraceae Methylobacteriaceae
## 45 1
## Methylocystaceae Microbacteriaceae
## 2 10
## Micrococcaceae Moraxellaceae
## 2 4
## Multi-affiliation Mycobacteriaceae
## 10 4
## Neisseriaceae Nitrosomonadaceae
## 3 1
## Nitrospiraceae OPB56
## 1 1
## Oxalobacteraceae Peptostreptococcaceae
## 2 3
## Planococcaceae Pseudomonadaceae
## 3 8
## Rhizobiaceae Rhodobacteraceae
## 2 9
## Rhodocyclaceae Rhodospirillaceae
## 8 13
## Rhodospirillales Incertae Sedis Rickettsiales Incertae Sedis
## 1 1
## Ruminococcaceae Shewanellaceae
## 76 1
## Sphingomonadaceae Streptococcaceae
## 6 2
## Synergistaceae unknown family
## 2 4
## Verrucomicrobiaceae Xanthomonadaceae
## 7 4
votes <- name.balance(tree, tax, 'n124', return.votes = c('up', 'down'))
votes[[c('up.votes', 'Family')]] # Numerator at Family Level
## votes
## Aeromonadaceae Alcaligenaceae
## 1 1
## Bradyrhizobiaceae Burkholderiaceae
## 3 4
## Caulobacteraceae Cellvibrionaceae
## 1 1
## Chromatiaceae Chthoniobacterales Incertae Sedis
## 5 1
## Comamonadaceae Coriobacteriaceae
## 18 1
## Dermatophilaceae Desulfovibrionaceae
## 1 25
## Enterobacteriaceae Erythrobacteraceae
## 3 1
## Fibrobacteraceae Halomonadaceae
## 1 1
## Herpetosiphonaceae Hyphomicrobiaceae
## 1 1
## Methylobacteriaceae Methylocystaceae
## 1 2
## Microbacteriaceae Micrococcaceae
## 10 2
## Moraxellaceae Multi-affiliation
## 4 8
## Mycobacteriaceae Neisseriaceae
## 4 3
## Nitrosomonadaceae Nitrospiraceae
## 1 1
## OPB56 Oxalobacteraceae
## 1 2
## Pseudomonadaceae Rhizobiaceae
## 8 2
## Rhodobacteraceae Rhodocyclaceae
## 9 8
## Rhodospirillaceae Rhodospirillales Incertae Sedis
## 13 1
## Rickettsiales Incertae Sedis Ruminococcaceae
## 1 1
## Shewanellaceae Sphingomonadaceae
## 1 6
## unknown family Verrucomicrobiaceae
## 4 7
## Xanthomonadaceae
## 4
votes[[c('down.votes', 'Family')]] # Denominator at Family Level
## votes
## Acidaminococcaceae Christensenellaceae
## 1 4
## Clostridiaceae 1 Clostridiales vadinBB60 group
## 20 2
## Enterococcaceae Erysipelotrichaceae
## 3 1
## Eubacteriaceae Family XII
## 2 2
## Family XIII Lachnospiraceae
## 10 45
## Multi-affiliation Peptostreptococcaceae
## 2 3
## Planococcaceae Ruminococcaceae
## 3 75
## Streptococcaceae
## 2
votes <- name.balance(tree, tax, 'n415', return.votes = c('up', 'down'))
votes[[c('up.votes', 'Family')]] # Numerator at Family Level
## votes
## Clostridiaceae 1
## 1
votes[[c('down.votes', 'Family')]] # Denominator at Family Level
## votes
## Clostridiaceae 1
## 1
votes <- name.balance(tree, tax, 'n416', return.votes = c('up', 'down'))
votes[[c('up.votes', 'Family')]] # Numerator at Family Level
## votes
## Peptostreptococcaceae
## 3
votes[[c('down.votes', 'Family')]] # Denominator at Family Level
## votes
## Family XIII
## 10
# Visualisation des balances
library(ggtree); packageVersion("ggtree")
## [1] '2.2.4'
library(dplyr); packageVersion('dplyr')
## [1] '1.0.2'
tc.nn <- name.to.nn(tree, top.coords)
tc.colors <- c('#a6cee3', '#1f78b4','#b2df8a', '#33a02c')
p <- ggtree(tree, layout='fan') +
geom_balance(node=tc.nn[1], fill=tc.colors[1], alpha=0.6) +
geom_balance(node=tc.nn[2], fill=tc.colors[2], alpha=0.6) +
geom_balance(node=tc.nn[3], fill=tc.colors[3], alpha=0.6) +
geom_balance(node=tc.nn[4], fill=tc.colors[4], alpha=0.6)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
p <- annotate_balance(tree, 'n94', p=p, labels = c('n94+', 'n94-'),
offset.text=0.15, bar=FALSE)
p <- annotate_balance(tree, 'n124', p=p, labels = c('n124+', 'n124-'),
offset.text=0.15, bar=FALSE)
p <- annotate_balance(tree, 'n415', p=p, labels = c('n415+', 'n415-'),
offset.text=0.15, bar=FALSE)
p <- annotate_balance(tree, 'n416', p=p, labels = c('n416+', 'n416-'),
offset.text=0.15, bar=FALSE)
p
ggsave("Tree_balance_adult_dev_philr.pdf",width=8,height=8)
gp.philr.long <- convert_to_long(gp.philr, get_variable(MTDEV, 'adult')) %>%
filter(coord %in% top.coords)
ggplot(gp.philr.long, aes(x=labels, y=value)) +
geom_boxplot(fill='lightgrey') +
facet_grid(.~coord, scales='free_x') +
xlab('Adult') + ylab('Balance Value') +
theme_bw()
ggsave("Barplot_balance_adult_dev_philr.pdf",width=8,height=8)
library(tidyr); packageVersion('tidyr')
## [1] '1.1.2'
gp.philr.long %>%
rename(Adult=labels) %>%
filter(coord %in% c('n124', 'n94')) %>%
spread(coord, value) %>%
ggplot(aes(x=n124, y=n94, color=Adult)) +
geom_point(size=4) +
xlab(tc.names['n124']) + ylab(tc.names['n94']) +
theme_bw()