METATETARD

Differential abundance analysis of OTUs

on tadpole’s guts according to diet

on 16S rRNA RNA data

# Messages and warnings turned off for rendering
  
#Analysis using Deseq2 and phyloseq
library(phyloseq)
library(DESeq2)
library(ggplot2)
library(phytools)
library(dplyr)
# Warnings turned off for rendering
## Folders, Themes, colors
source("prelude.R")

# Setting up directories
data_dir<-paste0(data_dir_path,"xpdiet")
output_dir<-paste0(output_dir_path,"Figure5_diet")


# Importing data
# Defining the directory containing the data to import
setwd(data_dir)

# Importing abundance data
comm<-read.table("nmt3_abondance_xpfood_tpn_notax.csv",header=TRUE,row.names=1,sep=";")
# Getting rid of OTUs with zero abundance
comm<-comm %>% select_if((function(col) is.numeric(col) && sum(col) > 0))
comm<-t(comm)
#Importing metadata
metadata<-read.csv("nmt3_2_metadata_food.csv",sep=";",header=TRUE,row.names=1)
#Importing taxonomy
taxmat<-read.csv("nmt3_Gal18_abondance_tax.csv",sep=";",header=TRUE,row.names=1)
taxmat<-as.matrix(taxmat)
#To build the phyloseq data set
OTU<-otu_table(comm, taxa_are_rows = TRUE)
TAX<-tax_table(taxmat)
sample_data<-sample_data(metadata)
#Importing phylogeny
phy<-read.tree("nmt3_sequences_mpr.tree")
#Rooting the tree using midpoint rooting
phy<-midpoint.root(phy)
physeq.ini = merge_phyloseq(OTU,sample_data,TAX,phy)

# We keep OTUs seen more than 100 times in 20% of samples
physeq.ini.f<-filter_taxa(physeq.ini, function(x) sum(x > 10) >= (0.2*length(x)), TRUE)
# How many OTUs were found ?
length(row.names(otu_table(physeq.ini)))
## [1] 627
# How many OTUs have been filtered ?
length(row.names(otu_table(physeq.ini.f)))
## [1] 243
# Il y a 243 OTUs sur 627 qui passent le filtre d'abondance et de prévalence.

# Performing the differential analysis using DESeq2
diagdds <- phyloseq_to_deseq2(physeq.ini.f, ~ Class)
## converting counts to integer mode
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
diagdds <- DESeq(diagdds, test="Wald", fitType="local")
## estimating size factors
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
## final dispersion estimates
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
## fitting model and testing
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
## -- replacing outliers and refitting for 10 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
## fitting model and testing
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
res <- results(diagdds, cooksCutoff = FALSE)
alpha <- 0.01
sigtab <- res[which(res$padj < alpha), ]
sigtab <- cbind(as(sigtab, "data.frame"), as(tax_table(physeq.ini)[rownames(sigtab), ], "matrix"))
sigtab <- sigtab %>% tibble::rownames_to_column(var="OTU_ID")
head(sigtab)
##        OTU_ID baseMean log2FoldChange     lfcSE      stat       pvalue
## 1 Cluster_686 11.68072       5.211772 1.0020760  5.200975 1.982453e-07
## 2 Cluster_148 55.66419      -9.104689 2.1472060 -4.240249 2.232716e-05
## 3 Cluster_175 33.90601       7.557872 0.9826646  7.691202 1.457590e-14
## 4 Cluster_184 44.74446      -4.348354 1.0100254 -4.305192 1.668407e-05
## 5 Cluster_364 33.64367       8.815808 1.0698899  8.239921 1.723250e-16
## 6 Cluster_522 18.00732       6.908281 1.0783529  6.406327 1.490674e-10
##           padj   Domain        Phylum            Class              Order
## 1 1.120317e-06 Bacteria Bacteroidetes       Cytophagia       Cytophagales
## 2 9.195763e-05 Bacteria Bacteroidetes Sphingobacteriia Sphingobacteriales
## 3 4.427430e-13 Bacteria Bacteroidetes Sphingobacteriia Sphingobacteriales
## 4 6.990048e-05 Bacteria Bacteroidetes Sphingobacteriia Sphingobacteriales
## 5 1.046874e-14 Bacteria Bacteroidetes Sphingobacteriia Sphingobacteriales
## 6 1.665436e-09 Bacteria Bacteroidetes Sphingobacteriia Sphingobacteriales
##                Family             Genus           Species
## 1       Cytophagaceae    Sporocytophaga    unknownspecies
## 2    Chitinophagaceae Sediminibacterium Multi-affiliation
## 3      Saprospiraceae      unknowngenus    unknownspecies
## 4 Sphingobacteriaceae        Pedobacter    unknownspecies
## 5  NS11-12marinegroup      unknowngenus    unknownspecies
## 6  NS11-12marinegroup      unknowngenus    unknownspecies
# Defining the output directory
setwd(output_dir)

# Phylum order
x <- tapply(sigtab$log2FoldChange, sigtab$Phylum, function(x) max(x))
x <- sort(x, TRUE)
sigtab$Phylum <- factor(as.character(sigtab$Phylum), levels=names(x))
ggplot(sigtab, aes(y=Phylum, x=log2FoldChange, color=Phylum)) + geom_point(size=4,colour="black")+geom_point(size=3,alpha=0.7)+theme(axis.text.y = element_text(size=rel(0.8)))+scale_colour_manual(values=jcolors)+theme_npgray()

# Family order
ggplot(sigtab, aes(y=Family, x=log2FoldChange, color=Phylum)) + geom_point(size=4,colour="black")+geom_point(size=3,alpha=0.7)+theme(axis.text.y = element_text(size=rel(0.8)))+scale_colour_manual(values=jcolors)+theme_npgray()+facet_grid(~Phylum)+guides(colour=FALSE)

ggsave("plot_deseq_diet_by_family.pdf",width=12,height=9)


# Genus order
x <- tapply(sigtab$log2FoldChange, sigtab$Genus, function(x) max(x))
x <- sort(x, TRUE)
sigtab$Genus <- factor(as.character(sigtab$Genus), levels=names(x))
ggplot(sigtab, aes(y=reorder(Genus,log2FoldChange), x=log2FoldChange, color=Phylum)) + geom_point(size=4,colour="black") + geom_point(size=3,alpha=0.7)+theme(axis.text.y = element_text(size=rel(0.8)))+scale_colour_manual(values=jcolors)+theme_npgray()+guides(colour=FALSE)

ggsave("plot_deseq_diet.pdf",width=12,height=14)

# Presentation using a combinaison of taxonomy and OTU ID
sigtab<-mutate(sigtab,otu2genus=paste(Family,Genus,OTU_ID,sep="_"))
ggplot(sigtab, aes(y=reorder(sigtab$otu2genus,log2FoldChange), x=log2FoldChange, color=Phylum)) + geom_point(size=4,colour="black") + geom_point(size=3,alpha=0.7)+theme(axis.text.y = element_text(size=rel(0.8)))+scale_colour_manual(values=jcolors)+theme_npgray()+guides(colour=FALSE)+ylab("OTUs")

ggsave("plot_deseq_diet_OTU2tax.pdf",width=14,height=14)

# Presentation using only OTU ID
ggplot(sigtab, aes(y=reorder(sigtab$OTU_ID,log2FoldChange), x=log2FoldChange, color=Phylum)) + geom_point(size=4,colour="black") + geom_point(size=3,alpha=0.7)+theme(axis.text.y = element_text(size=rel(0.8)))+scale_colour_manual(values=jcolors)+theme_npgray()+facet_grid(~Phylum)+guides(colour=FALSE)+ylab("OTUs")

ggsave("plot_deseq_diet_OTU.pdf",width=12,height=14)

# Selection of the differential OTUs
my_de_subset <- subset(otu_table(physeq.ini.f),rownames(otu_table(physeq.ini.f)) %in% sigtab$OTU_ID)
physeq.de.ini <- merge_phyloseq(my_de_subset, tax_table(physeq.ini.f), sample_data(physeq.ini.f),phy_tree(physeq.ini.f))

# Plotting the Heatmap using the differential OTUs
# Heatmap without clustering.
plot_heatmap(physeq.de.ini,sample.label="Class",low="#66CCFF", high="#000033", na.value="white")

ggsave("plot_heatmap_de_noclustering.pdf",height=12,width=12)

# Heatmap using unifrac PCoA for community composition
plot_heatmap(physeq.de.ini,method="PCoA",distance="unifrac",sample.label="Class",low="#66CCFF", high="#000033", na.value="white") + ylab("OTUs")+ theme_npgray()+ theme(axis.text.x = element_text(angle = 45, hjust = 1))+facet_grid(~Feed,scale="free_x", space="free_x")

ggsave("plot_heatmap_de_clustering.pdf",height=12,width=12)