METATETARD

Differential abundance analysis of OTUs

on developmental serie 16S rRNA GENE data

XPDEV

#Analysis using Deseq2 and phyloseq
library(ggplot2)
library(dplyr)
library(reshape2)
library(grid)
library(gridExtra)
library(ape)
library(scales)
library(phytools)
library(phyloseq)
library(DESeq2)
## Warnings turned off for rendering.
## Folders, Themes, colors
source("prelude.R")

# Setting up directories
data_dir<-paste0(data_dir_path,"xpdev")
output_dir<-paste0(output_dir_path,"Supp_Figure_3")

# Importing data
# Defining the directory containing the data to import
setwd(data_dir)

# Importing abundance data
comm<-read.table("nmt3_abondance_tpn_notax_XP_Dev.asc",header=TRUE,row.names=1)
# Getting rid of OTUs with zero abundance
comm<-comm %>% select_if((function(col) is.numeric(col) && sum(col) > 0))
comm<-t(comm)
#Importing metadata
metadata<-read.csv("nmt3_2_metadata_xp_dev.csv",sep=";",header=TRUE,row.names=1)
# To fix the order of life stages
metadata$Stage_sorted<-factor(metadata$LifeStage,levels=c("Premetamorph","Prometamorph","Metamorph","Froglet","Adult"))

taxmat<-read.csv("nmt3_Gal18_abondance_tax.csv",sep=";",header=TRUE,row.names=1)
taxmat<-as.matrix(taxmat)
OTU <- otu_table(comm, taxa_are_rows = TRUE)
TAX <- tax_table(taxmat)
sample_data = sample_data(metadata)
#Importing phylogeny
phy<-read.tree("nmt3_sequences_mpr.tree")
#Rooting the tree using midpoint rooting
phy<-midpoint.root(phy)
physeq.ini <- merge_phyloseq(OTU,sample_data,TAX,phy)

# We keep OTUs seen more than 100 times in 20% of samples
physeq.ini.f<-filter_taxa(physeq.ini, function(x) sum(x > 100) >= (0.2*length(x)), TRUE)

# How many OTUs were found ?
length(row.names(otu_table(physeq.ini)))
## [1] 666
# How many OTUs have been filtered ?
length(row.names(otu_table(physeq.ini.f)))
## [1] 35
# Il y a 35 OTUs sur 666 qui passent le filtre d'abondance et de prévalence.

# Performing the differential analysis using DESeq2
diagdds <- phyloseq_to_deseq2(physeq.ini.f, ~ Stage_sorted)
## converting counts to integer mode
diagdds <- DESeq(diagdds, test="Wald", fitType="local")
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 6 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
res <- results(diagdds, cooksCutoff = FALSE)
alpha <- 0.01
sigtab <- res[which(res$padj < alpha), ]
sigtab <- cbind(as(sigtab, "data.frame"), as(tax_table(physeq.ini)[rownames(sigtab), ], "matrix"))
sigtab <- sigtab %>% tibble::rownames_to_column(var="OTU_ID")
head(sigtab)
##        OTU_ID   baseMean log2FoldChange    lfcSE       stat       pvalue
## 1  Cluster_13  118.04896       7.173592 1.400432   5.122416 3.016460e-07
## 2  Cluster_47 1452.67560      -8.898606 1.450649  -6.134224 8.557572e-10
## 3 Cluster_153   62.24298      11.160995 1.344169   8.303267 1.012869e-16
## 4 Cluster_144 1052.82260       3.803587 1.194489   3.184278 1.451156e-03
## 5  Cluster_10  237.20490      13.325704 1.393890   9.560086 1.176589e-21
## 6   Cluster_2 9249.15138     -15.530616 1.229463 -12.632033 1.405985e-36
##           padj   Domain         Phylum            Class              Order
## 1 7.038406e-07 Bacteria  Bacteroidetes      Bacteroidia      Bacteroidales
## 2 2.303962e-09 Bacteria  Bacteroidetes      Bacteroidia      Bacteroidales
## 3 8.862607e-16 Bacteria     Firmicutes Erysipelotrichia Erysipelotrichales
## 4 2.308657e-03 Bacteria     Firmicutes Erysipelotrichia Erysipelotrichales
## 5 1.372687e-20 Bacteria  Synergistetes      Synergistia      Synergistales
## 6 4.920946e-35 Bacteria Actinobacteria   Actinobacteria      Micrococcales
##                Family                         Genus
## 1  Porphyromonadaceae               Parabacteroides
## 2      Bacteroidaceae                   Bacteroides
## 3 Erysipelotrichaceae [Anaerorhabdus] furcosa group
## 4 Erysipelotrichaceae                  Faecalitalea
## 5      Synergistaceae                 unknown genus
## 6   Microbacteriaceae                     Leifsonia
##                              Species
## 1                  Multi-affiliation
## 2                    unknown species
## 3                    unknown species
## 4                    unknown species
## 5                    unknown species
## 6 Microbacteriaceae bacterium MAEY21
# Defining the output directory
setwd(output_dir)

# Phylum order
x <- tapply(sigtab$log2FoldChange, sigtab$Phylum, function(x) max(x))
x <- sort(x, TRUE)
sigtab$Phylum <- factor(as.character(sigtab$Phylum), levels=names(x))
ggplot(sigtab, aes(y=Phylum, x=log2FoldChange, color=Phylum)) + geom_point(size=4,colour="black")+geom_point(size=3,alpha=0.7)+theme(axis.text.y = element_text(size=rel(0.8)))+scale_colour_manual(values=jcolors)+theme_npgray()

# Family order
ggplot(sigtab, aes(y=Family, x=log2FoldChange, color=Phylum)) + geom_point(size=4,colour="black")+geom_point(size=3,alpha=0.7)+theme(axis.text.y = element_text(size=rel(0.8)))+scale_colour_manual(values=jcolors)+theme_npgray()+facet_grid(~Phylum)+guides(colour=FALSE)+ylab("Family")

ggsave("plot_deseq_stage_by_family.pdf",width=12,height=9)


# Genus order
x <- tapply(sigtab$log2FoldChange, sigtab$Genus, function(x) max(x))
x <- sort(x, TRUE)
sigtab$Genus <- factor(as.character(sigtab$Genus), levels=names(x))
ggplot(sigtab, aes(y=reorder(Genus,log2FoldChange), x=log2FoldChange, color=Phylum)) + geom_point(size=4,colour="black") + geom_point(size=3,alpha=0.7)+theme(axis.text.y = element_text(size=rel(0.8)))+scale_colour_manual(values=jcolors)+theme_npgray()+facet_grid(~Phylum)+guides(colour=FALSE)+ylab("Genus")

ggsave("plot_deseq_stage.pdf",width=12,height=9)

# Presentation using a combinaison of taxonomy and OTU ID
sigtab<-mutate(sigtab,otu2genus=paste(Family,Genus,OTU_ID,sep="_"))
ggplot(sigtab, aes(y=reorder(sigtab$otu2genus,log2FoldChange), x=log2FoldChange, color=Phylum)) + geom_point(size=4,colour="black") + geom_point(size=3,alpha=0.7)+theme(axis.text.y = element_text(size=rel(0.8)))+scale_colour_manual(values=jcolors)+theme_npgray()+facet_grid(~Phylum)+guides(colour=FALSE)+ylab("OTUs")

ggsave("plot_deseq_stage_OTU2tax.pdf",width=14,height=9)

# Presentation using only OTU ID
ggplot(sigtab, aes(y=reorder(OTU_ID,log2FoldChange), x=log2FoldChange, color=Phylum)) + geom_point(size=4,colour="black") + geom_point(size=3,alpha=0.7)+theme(axis.text.y = element_text(size=rel(0.8)))+scale_colour_manual(values=jcolors)+theme_npgray()+facet_grid(~Phylum)+guides(colour=FALSE)+ylab("OTUs")

ggsave("plot_deseq_stage_OTU.pdf",width=12,height=9)

# Selection of the differential OTUs
my_de_subset <- subset(otu_table(physeq.ini.f),rownames(otu_table(physeq.ini.f)) %in% sigtab$OTU_ID)
physeq.de.ini <- merge_phyloseq(my_de_subset, tax_table(physeq.ini.f), sample_data(physeq.ini.f),phy_tree(physeq.ini.f))



# Plotting the Heatmap using the differential OTUs
# Heatmap without clustering. We sort sample names according to developmental stages (automatic sorting is difficult because of the adult stage being non numeric
life_stage_order<-c("XT1A4G","XT1A5G","XT1B2G","XT1B2Gbis","XT1B4G","XT1B5G","XT1A2G","XT1B1G","XTLS13","XTLS14","XTLS15","XTLS16","XTLS17","XTLS12","XTLS2","XTLS3","XTLS4","XTLS5","XTLS6","XTLS1","XTLS10","XTLS11","XTLS7","XTLS8","XTLS9","XTLSM1","XTLSM2","XTLSM3","XTLTW2M","XTLTW3F","XTLTW3M","XTLTW4F","XTLTW5F")
life_stage_label<-c("NF55","NF55","NF55","NF55","NF55","NF55","NF55","NF56","NF57","NF58","NF58","NF58","NF58","NF61","NF62","NF62","NF62","NF62","NF62","NF66","NF66","NF66","NF66","NF66","NF66","Adult","Adult","Adult","Adult","Adult","Adult","Adult","Adult")

# Heatmap without clustering by sorting samples according to life stages
#plot_heatmap(physeq.de.ini,sample.label="DevelopmentalStage",low="#66CCFF", high="#000033",na.value="white")+scale_x_discrete(limits=life_stage_order,labels=life_stage_label)+facet_grid(~Stage_sorted,scale="free_x", space="free_x")
#ggsave("plot_heatmap_de_noclustering.pdf",height=6,width=12)

# Heatmap using unifrac PCoA for community composition
plot_heatmap(physeq.de.ini,method="PCoA",distance="unifrac",sample.label="DevelopmentalStage",low="#66CCFF", high="#000033", na.value="white") + ylab("OTUs")+ theme_npgray()+ theme(axis.text.x = element_text(angle = 45, hjust = 1))+facet_grid(~Stage_sorted,scale="free_x", space="free_x")

ggsave("plot_heatmap_de_clustering.pdf",height=6,width=12)



##################################
# Differential analysis          #
# Premetamorph ProMetamorph = PP #
##################################

resPP<-results(diagdds,cooksCutoff=FALSE,contrast=c('Stage_sorted','Premetamorph','Prometamorph'))
alpha <- 0.01
sigtabPP <- resPP[which(resPP$padj < alpha), ]
sigtabPP <- cbind(as(sigtabPP, "data.frame"), as(tax_table(physeq.ini.f)[rownames(sigtabPP), ], "matrix"))
# PP Phylum order
x <- tapply(sigtabPP$log2FoldChange, sigtabPP$Phylum, function(x) max(x))
x <- sort(x, TRUE)
sigtabPP$Phylum <- factor(as.character(sigtabPP$Phylum), levels=names(x))

# PP Genus order
x <- tapply(sigtabPP$log2FoldChange, sigtabPP$Genus, function(x) max(x))
x <- sort(x, TRUE)
sigtabPP$Genus <- factor(as.character(sigtabPP$Genus), levels=names(x))
ggplot(sigtabPP, aes(y=Genus, x=log2FoldChange, color=Phylum)) + geom_point(size=3,alpha=0.7)+theme(axis.text.y = element_text(size=rel(0.8)))+scale_colour_manual(values=jcolors)+facet_grid(~Phylum)+guides(colour=FALSE)+ggtitle(label="Premetamorph vs Prometamorph")

# PP Figure
pPP<-ggplot(sigtabPP, aes(y=Genus, x=log2FoldChange, colour=Phylum)) + geom_point(size=4,alpha=0.7)+theme(axis.text.y = element_text(size=rel(0.8)))+scale_colour_manual(values=jcolors)+theme_npgray()+theme(legend.position=c(0.8,0.8),legend.background=element_rect(fill="transparent",colour=NA))+ggtitle("Premetamorph vs Prometamorph")
#pPP
#ggsave("plot_deseq_PPstage.pdf",width=10,height=10)

############################################
# Differential Premetamorph Metamorph = TM #
############################################

resTM<-results(diagdds,cooksCutoff=FALSE,contrast=c('Stage_sorted','Premetamorph','Metamorph'))
alpha <- 0.01
sigtabTM <- resTM[which(resTM$padj < alpha), ]
sigtabTM <- cbind(as(sigtabTM, "data.frame"), as(tax_table(physeq.ini.f)[rownames(sigtabTM), ], "matrix"))

# TM Phylum order
x <- tapply(sigtabTM$log2FoldChange, sigtabTM$Phylum, function(x) max(x))
x <- sort(x, TRUE)
sigtabTM$Phylum <- factor(as.character(sigtabTM$Phylum), levels=names(x))

# TM Genus order
x <- tapply(sigtabTM$log2FoldChange, sigtabTM$Genus, function(x) max(x))
x <- sort(x, TRUE)
sigtabTM$Genus <- factor(as.character(sigtabTM$Genus), levels=names(x))
ggplot(sigtabTM, aes(y=Genus, x=log2FoldChange, color=Phylum)) + geom_point(size=3,alpha=0.7)+theme(axis.text.y = element_text(size=rel(0.8)))+scale_colour_manual(values=jcolors)+facet_grid(~Phylum)+guides(colour=FALSE)+ggtitle("Premetamorph vs Metamorph")

# TM Figure
pTM<-ggplot(sigtabTM, aes(y=Genus, x=log2FoldChange, colour=Phylum)) + geom_point(size=4,alpha=0.7)+theme(axis.text.y = element_text(size=rel(0.8)))+scale_colour_manual(values=jcolors)+theme_npgray() +theme(legend.position=c(0.8,0.8),legend.background=element_rect(fill="transparent",colour=NA))+ggtitle("Premetamorph vs Metamorph")
#pTM
#ggsave("plot_deseq_TMstage.pdf",width=10,height=10)

########################################
# Differential Premetamorph Adult = PA #
########################################

resPA<-results(diagdds,cooksCutoff=FALSE,contrast=c('Stage_sorted','Premetamorph','Adult'))
alpha <- 0.01
sigtabPA <- resPA[which(resPA$padj < alpha), ]
sigtabPA <- cbind(as(sigtabPA, "data.frame"), as(tax_table(physeq.ini.f)[rownames(sigtabPA), ], "matrix"))

# PA Phylum order
x <- tapply(sigtabPA$log2FoldChange, sigtabPA$Phylum, function(x) max(x))
x <- sort(x, TRUE)
sigtabPA$Phylum <- factor(as.character(sigtabPA$Phylum), levels=names(x))

# PA Genus order
x <- tapply(sigtabPA$log2FoldChange, sigtabPA$Genus, function(x) max(x))
x <- sort(x, TRUE)
sigtabPA$Genus <- factor(as.character(sigtabPA$Genus), levels=names(x))
ggplot(sigtabPA, aes(y=Genus, x=log2FoldChange, colour=Phylum)) + geom_point(size=3,alpha=0.7)+theme(axis.text.y = element_text(size=rel(0.8)))+scale_colour_manual(values=jcolors)+facet_grid(~Phylum)+guides(colour=FALSE)+ggtitle("Premetamorph vs Adult")+facet_grid(~Phylum)+guides(colour=FALSE)+ggtitle("Premetamorph vs Adult")

# PA Figure
pPA<-ggplot(sigtabPA, aes(y=Genus, x=log2FoldChange, colour=Phylum)) + geom_point(size=4,alpha=0.7)+theme(axis.text.y = element_text(size=rel(0.8)))+scale_colour_manual(values=jcolors)+theme_npgray() +theme(legend.position=c(0.8,0.8),legend.background=element_rect(fill="transparent",colour=NA))+ggtitle("Premetamorph vs Adult")
#pPA
#ggsave("plot_deseq_PA_stage.pdf",width=10,height=10)


#############################################
# Differential Prometamorph Metamorph = PRM #
#############################################

resPRM<-results(diagdds,cooksCutoff=FALSE,contrast=c('Stage_sorted','Prometamorph','Metamorph'))
alpha <- 0.01
sigtabPRM <- resPRM[which(resPRM$padj < alpha), ]
sigtabPRM <- cbind(as(sigtabPRM, "data.frame"), as(tax_table(physeq.ini.f)[rownames(sigtabPRM), ], "matrix"))
# PRM Phylum order
x <- tapply(sigtabPRM$log2FoldChange, sigtabPRM$Phylum, function(x) max(x))
x <- sort(x, TRUE)
sigtabPRM$Phylum<- factor(as.character(sigtabPRM$Phylum), levels=names(x))

# PRM Genus order
x <- tapply(sigtabPRM$log2FoldChange, sigtabPRM$Genus, function(x) max(x))
x <- sort(x, TRUE)
sigtabPRM$Genus <- factor(as.character(sigtabPRM$Genus), levels=names(x))
ggplot(sigtabPRM, aes(y=Genus, x=log2FoldChange, color=Phylum)) + geom_point(size=3,alpha=0.7)+theme(axis.text.y = element_text(size=rel(0.8)))+scale_colour_manual(values=jcolors)+facet_grid(~Phylum)+guides(colour=FALSE)+ggtitle("Prometamorph vs Metamorph")

# PRM Figure
pPRM<-ggplot(sigtabPRM, aes(y=Genus, x=log2FoldChange, colour=Phylum)) + geom_point(size=4,alpha=0.7)+theme(axis.text.y = element_text(size=rel(0.8)))+scale_colour_manual(values=jcolors)+theme_npgray() +theme(legend.position=c(0.8,0.8),legend.background=element_rect(fill="transparent",colour=NA))+ggtitle("Prometamorph vs Metamorph")
#pPRM
#ggsave("plot_deseq_PRM_stage.pdf",width=10,height=10)


#########################################
# Differential Prometamorph Adult = PRA #
#########################################

resPRA<-results(diagdds,cooksCutoff=FALSE,contrast=c('Stage_sorted','Prometamorph','Adult'))
alpha <- 0.01
sigtabPRA <- resPRA[which(resPRA$padj < alpha), ]
sigtabPRA <- cbind(as(sigtabPRA, "data.frame"), as(tax_table(physeq.ini.f)[rownames(sigtabPRA), ], "matrix"))
# PRA Phylum order
x <- tapply(sigtabPRA$log2FoldChange, sigtabPRA$Phylum, function(x) max(x))
x <- sort(x, TRUE)
sigtabPRA$Phylum <- factor(as.character(sigtabPRA$Phylum), levels=names(x))

# PRA Genus order
x <- tapply(sigtabPRA$log2FoldChange, sigtabPRA$Genus, function(x) max(x))
x <- sort(x, TRUE)
sigtabPRA$Genus <- factor(as.character(sigtabPRA$Genus), levels=names(x))
ggplot(sigtabPRA, aes(y=Genus, x=log2FoldChange, color=Phylum)) + geom_point(size=3,alpha=0.7)+theme(axis.text.y = element_text(size=rel(0.8)))+scale_colour_manual(values=jcolors)+facet_grid(~Phylum)+guides(colour=FALSE)+ggtitle("Prometamorph vs Adult")

# PRA Figure
pPRA<-ggplot(sigtabPRA, aes(y=Genus, x=log2FoldChange, colour=Phylum)) + geom_point(size=4,alpha=0.7)+theme(axis.text.y = element_text(size=rel(0.8)))+scale_colour_manual(values=jcolors)+theme_npgray() +theme(legend.position=c(0.8,0.8),legend.background=element_rect(fill="transparent",colour=NA))+ggtitle("Prometamorph vs Adult")
#pPRA
#ggsave("plot_deseq_PRA_stage.pdf",width=10,height=10)



#####################################
# Differential Metamorph Adult = MA #
#####################################

resMA<-results(diagdds,cooksCutoff=FALSE,contrast=c('Stage_sorted','Metamorph','Adult'))
alpha <- 0.01
sigtabMA <- resMA[which(resMA$padj < alpha), ]
sigtabMA <- cbind(as(sigtabMA, "data.frame"), as(tax_table(physeq.ini.f)[rownames(sigtabMA), ], "matrix"))

# MA Phylum order
x <- tapply(sigtabMA$log2FoldChange, sigtabMA$Phylum, function(x) max(x))
x <- sort(x, TRUE)
sigtabMA$Phylum <- factor(as.character(sigtabMA$Phylum), levels=names(x))

# MA Genus order
x <- tapply(sigtabMA$log2FoldChange, sigtabMA$Genus, function(x) max(x))
x <- sort(x, TRUE)
sigtabMA$Genus <- factor(as.character(sigtabMA$Genus), levels=names(x))
ggplot(sigtabMA, aes(y=Genus, x=log2FoldChange, color=Phylum)) + geom_point(size=3,alpha=0.7)+theme(axis.text.y = element_text(size=rel(0.8)))+scale_colour_manual(values=jcolors)+facet_grid(~Phylum)+guides(colour=FALSE)+ggtitle("Metamorph vs Adult")

#Figure
pMA<-ggplot(sigtabMA, aes(y=Genus, x=log2FoldChange, colour=Phylum)) + geom_point(size=4,alpha=0.7)+theme(axis.text.y = element_text(size=rel(0.8)))+scale_colour_manual(values=jcolors)+theme_npgray() +theme(legend.position=c(0.8,0.8),legend.background=element_rect(fill="transparent",colour=NA))+ggtitle("Metamorph vs Adult")
#pMA
#ggsave("plot_deseq_MA_stage.pdf",width=10,height=10)


pPP<-pPP+guides(colour=FALSE)
pTM<-pTM+guides(colour=FALSE)
pPA<-pPA+guides(colour=FALSE)
pPRM<-pPRM+guides(colour=FALSE)
pPRA<-pPRA+guides(colour=FALSE)
pMA<-pMA+guides(colour=FALSE)


#pdf("plot_deseq_stageTMA.pdf",width=20,height=12)
#print(grid.arrange(pPP,pTM,pPA,pPRM,pPRA,pMA, ncol=3))
#dev.off()


#############################################
# Look at some specific class or phyla      #
# we can work on raw data                   #
# or on filtered data                       #
#  physeq.ini.g corresponds to              #
# raw abundance count data                  #
#############################################

physeq.rel <- microbiome::transform(physeq.ini, "compositional")
# Selecting differential OTUs
my_de_subset<-subset(otu_table(physeq.rel),rownames(otu_table(physeq.rel)) %in% sigtab$OTU_ID)
physeq.de.rel <- merge_phyloseq(my_de_subset, tax_table(physeq.rel), sample_data(physeq.rel),phy_tree(physeq.rel))
physeq.de.rel.g<-tax_glom(physeq.de.rel, taxrank="Genus")

# Phylogenetic tree of all OTUs identified using deseq2
plot_tree(physeq.de.rel.g, color="Stage_sorted", label.tips="Genus", size="abundance", plot.margin=0.3,text.size=3)

# Firmicutes are most often found in tadpole and adult stages with some exceptions
firmicutes<- subset_taxa(physeq.de.rel.g, Phylum=="Firmicutes")
pfirmicutes<-plot_tree(firmicutes, color="Stage_sorted", label.tips="Genus", size="abundance", plot.margin=0.3,text.size=4)
pfirmicutes

#pdf("plot_firmicutes_stage.pdf",width=15,height=20)
#print(pfirmicutes)
#dev.off()

#Bacteroidetes are most often found in adult samples
bacteroidetes<- subset_taxa(physeq.de.rel.g, Phylum=="Bacteroidetes")
pbacteroidetes<-plot_tree(bacteroidetes, color="Stage_sorted", label.tips="Genus", size="abundance", plot.margin=0.3,text.size=4)
pbacteroidetes

#pdf("plot_bacteroidetes_stage.pdf",width=15,height=20)
#print(pbacteroidetes)
#dev.off()

#Proteobacteria:
#Beta-proteobacteria are mostly found in adult samples
proteobacteria<- subset_taxa(physeq.de.rel.g, Phylum=="Proteobacteria")
pproteobacteria<-plot_tree(proteobacteria, color="Stage_sorted", label.tips="Genus", size="abundance", plot.margin=0.3,text.size=4)
pproteobacteria

#pdf("plot_proteobacteria_stage.pdf",width=15,height=20)
#print(pproteobacteria)
#dev.off()