METATETARD

Prevalence analysis of OTUs

on early tadpole 16S rRNA RNA data

# Message and warning turned off for rendering
# Loading packages
library(microbiome)
library(dplyr)
library(phytools)
library(knitr)
library(viridis)
library(RColorBrewer)
# Warning turned off for rendering
## Folders, Themes, colors
source("prelude.R")

# Setting up directories
data_dir <-paste0(data_dir_path,"xprna")
output_dir <- paste0(output_dir_path,"Figure4_rna")


#1 Importing data
# Defining the directory containing the data to import
setwd(data_dir)

## Importing abundance data
comm<-read.table("nmt3_abondance_early_tpn_notax.csv", header=TRUE, row.names=1,sep=";")
# Getting rid of OTUs with zero abundance
comm<-comm %>% select_if((function(col) is.numeric(col) && sum(col) > 0))
comm<-t(comm)
#Importing metadata
metadata<-read.csv("nmt3_metadata_rna.csv",sep=";",header=TRUE,row.names=1)
taxmat<-read.csv("nmt3_Gal18_abondance_tax.csv",sep=";",header=TRUE,row.names=1)
taxmat<-as.matrix(taxmat)
OTU = otu_table(comm, taxa_are_rows = TRUE)
TAX = tax_table(taxmat)
sample_data = sample_data(metadata)
#Importing phylogeny
phy<-read.tree("nmt3_sequences_mpr.tree")
#Rooting the tree using midpoint rooting
phy<-midpoint.root(phy)
physeq.ini = merge_phyloseq(OTU,sample_data,TAX,phy)

# Transformation in relative abundance data
physeq.rel <- microbiome::transform(physeq.ini, "compositional")


#############################################################################################################
# We determine the size of the core microbiome using a set of abundance and prevalence thresholds           #
# see Salonen et al. CMI, 2012                                                                               #
#############################################################################################################

# Core line plot
det <- c(0, 0.1, 0.5, 2, 5, 20)/100
prevalences <- seq(.05, 1, .05)
plot_core(physeq.rel, prevalences = prevalences, detections = det, plot.type = "lineplot") + xlab("Relative Abundance in %") + theme_npgray()

#################################################
# Core microbiome visualization using heatmaps  #
# Core microbiome definition  :                 #
# with 50% minimal prevalence and minimum       #
# detection threshold of 0.001 %                #
#################################################

########################################################################
#      50% minimal prevalence and minimum detection threshold of 0.001 #
########################################################################

physeq1.core <- core(physeq.rel, detection = 0.001, prevalence = .5)

#Fetching taxa names corresponding to the core microbiota OTUs
core.taxa <- taxa(physeq1.core)

#Fetching taxonomy  corresponding to the core microbiota OTUs
tax.mat <- tax_table(physeq1.core)
tax.df <- as.data.frame(tax.mat)

# Adding OTU identifiers
tax.df$OTU <- rownames(tax.df)

# select taxonomy of only
# those OTUs that are core memebers based on the thresholds that were used.
core.taxa.class <- dplyr::filter(tax.df, rownames(tax.df) %in% core.taxa)
knitr::kable(head(core.taxa.class))
Domain Phylum Class Order Family Genus Species OTU
Bacteria Bacteroidetes Cytophagia Cytophagales Cytophagaceae Flectobacillus Flectobacillusroseus Cluster_23
Bacteria Bacteroidetes Sphingobacteriia Sphingobacteriales Saprospiraceae unknowngenus Haliscomenobactersp. Cluster_146
Bacteria Bacteroidetes Flavobacteriia Flavobacteriales Flavobacteriaceae Flavobacterium Multi-affiliation Cluster_12
Bacteria Bacteroidetes Bacteroidia Bacteroidales Rikenellaceae dgA-11gutgroup unknownspecies Cluster_32
Bacteria Bacteroidetes Bacteroidia Bacteroidales Rikenellaceae Rikenella Rikenellamicrofusus Cluster_118
Bacteria Bacteroidetes Bacteroidia Bacteroidales Rikenellaceae Rikenella unknownspecies Cluster_230
# Saving a file containing the list of OTUs and their taxonomy
write.table(core.taxa.class,file="core1_taxa_otu.csv",sep=";",row.names=TRUE,col.names=TRUE)

# Selection of core microbiota OTUs in the starting physeq.ini
my_subset <- subset(otu_table(physeq.ini), rownames(otu_table(physeq.ini)) %in% tax.df$OTU)
physeq.core.ini <- merge_phyloseq(my_subset, tax_table(physeq.ini), sample_data(physeq.ini),phy_tree(physeq.ini))

##########################
# Plotting the heat map  #
##########################

# Defining the output dir
setwd(output_dir)

prevalences <- seq(.05, 1, .05)
detections <- 10^seq(log10(1e-3), log10(.2), length = 20)

# I fetched the list of OTUs and their order from the pdf of plot_core
# Then I replace OTUs IDs by a more informative taxonomic name
# A partir du pdf du plot_core sans modification des légendes y (:-o) et je le renseigne dans le fichier core1_taxa_otu.xlsx pour remplacer les noms d'OTU par un nom taxonomique

plot_core(physeq.rel, plot.type = "heatmap", prevalences = prevalences, detections = detections, min.prevalence = .5,colours = rev(brewer.pal(5, "Spectral")),)+theme_npgray()+scale_x_discrete(breaks=detections,labels=c("0.001","","","","","","0.005","","","","","","","","0.05","","","","","0.20"),name="Detection threshold in % of relative abundance")+scale_y_discrete(labels=c("Alphaproteobacterium UT-4","Ruminococcaceae sp","Rikenella sp","Ruminococcaceae sp","Haliscomenobacter sp","Rikenella microfusus","Lachnospiraceae sp","Desulfovibrio sp","Massilia sp","Azovibrio sp","Anaerotruncus sp","Porphyromonadaceae sp","Pseudomonas sp","Bacteroides sp","Bacteroides sp","Sphingomonadaceae sp","Rikenellaceae sp","Rikenella sp","Parabacteroides sp","Bacteroides fragilis","Desulfovibrio sp","Peptostreptococcaceae sp","Bacteroides sp","Parabacteroides sp","Flectobacillus roseus","Rheinheimera sp","Flavobacterium sp","Parabacteroides sp","Synergistaceae sp","Desulfovibrio sp","Bacteroides sp","Alphaproteobacterium UT-4","Rikenella sp")
)+ theme(legend.text=element_text(size=10))

ggsave("core_heatmap_prev_05.pdf",width=10,height=10)

plot_heatmap(physeq.core.ini,sample.label="Description",low="#66CCFF", high="#000033", na.value="white")+scale_x_discrete(limits=c("SD-SL-PE","SD-SL-NF41","SD-SL-NF48","SD-SL-NF50","SD-SL-NF52","SD-SL-iNF52","SD-SL-iNF56","SD-SL-iNF60","SD-SL-iNF63","SD-SL-iNF66","SD-TGA-PE","SD-TGA-NF41","SD-TGA-NF48","SD-TGA-NF50","SD-TGA-NF52","SD-TGA-iNF52","SD-TGA-iNF56","SD-TGA-iNF60","SD-TGA-iNF63","SD-TGA-iNF66"))+theme_npgray()+ theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.

ggsave("core_heatmap_abo_1.pdf",width=10,height=10)


# Same graph with OTUs in the same order as the prevalence plot, and grouped by developmental stages
plot_heatmap(physeq.core.ini,sample.label="Description",low="#66CCFF", high="#000033", na.value="white")+scale_x_discrete(limits=c("SD-SL-PE","SD-TGA-PE","SD-SL-NF41","SD-TGA-NF41","SD-SL-NF48","SD-TGA-NF48","SD-SL-NF50","SD-TGA-NF50","SD-SL-NF52","SD-TGA-NF52","SD-SL-iNF52","SD-TGA-iNF52","SD-SL-iNF56","SD-TGA-iNF56","SD-SL-iNF60","SD-TGA-iNF60","SD-SL-iNF63","SD-TGA-iNF63","SD-SL-iNF66","SD-TGA-iNF66"))+scale_y_discrete(limits=c("Cluster_423","Cluster_251","Cluster_368","Cluster_68","Cluster_146","Cluster_118","Cluster_51","Cluster_53","Cluster_46","Cluster_16","Cluster_76","Cluster_72","Cluster_42","Cluster_59","Cluster_26","Cluster_117","Cluster_32","Cluster_230","Cluster_102","Cluster_36","Cluster_7","Cluster_17","Cluster_29","Cluster_13","Cluster_23","Cluster_8","Cluster_12","Cluster_11","Cluster_10","Cluster_3","Cluster_5","Cluster_1","Cluster_4"))+theme_npgray()+ theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.

ggsave("core_heatmap_abo_2.pdf",width=10,height=10)

# Same graph with OTUs changed to their taxonomy
plot_heatmap(physeq.core.ini,sample.label="Description",low="#66CCFF", high="#000033", na.value="white")+scale_x_discrete(limits=c("SD-SL-PE","SD-TGA-PE","SD-SL-NF41","SD-TGA-NF41","SD-SL-NF48","SD-TGA-NF48","SD-SL-NF50","SD-TGA-NF50","SD-SL-NF52","SD-TGA-NF52","SD-SL-iNF52","SD-TGA-iNF52","SD-SL-iNF56","SD-TGA-iNF56","SD-SL-iNF60","SD-TGA-iNF60","SD-SL-iNF63","SD-TGA-iNF63","SD-SL-iNF66","SD-TGA-iNF66"),labels=c("SL-PE","TGA-PE","SL-NF41","TGA-NF41","SL-NF48","TGA-NF48","SL-NF50","TGA-NF50","SL-NF52","TGA-NF52","SL-iNF52","TGA-iNF52","SL-iNF56","TGA-iNF56","SL-iNF60","TGA-iNF60","SL-iNF63","TGA-iNF63","SL-iNF66","TGA-iNF66"))+scale_y_discrete(limits=c("Cluster_423","Cluster_251","Cluster_368","Cluster_68","Cluster_146","Cluster_118","Cluster_51","Cluster_53","Cluster_46","Cluster_16","Cluster_76","Cluster_72","Cluster_42","Cluster_59","Cluster_26","Cluster_117","Cluster_32","Cluster_230","Cluster_102","Cluster_36","Cluster_7","Cluster_17","Cluster_29","Cluster_13","Cluster_23","Cluster_8","Cluster_12","Cluster_11","Cluster_10","Cluster_3","Cluster_5","Cluster_1","Cluster_4"),labels=c("Alphaproteobacterium UT-4","Ruminococcaceae sp","Rikenella sp","Ruminococcaceae sp","Haliscomenobacter sp","Rikenella microfusus","Lachnospiraceae sp","Desulfovibrio sp","Massilia sp","Azovibrio sp","Anaerotruncus sp","Porphyromonadaceae sp","Pseudomonas sp","Bacteroides sp","Bacteroides sp","Sphingomonadaceae sp","Rikenellaceae sp","Rikenella sp","Parabacteroides sp","Bacteroides fragilis","Desulfovibrio sp","Peptostreptococcaceae sp","Bacteroides sp","Parabacteroides sp","Flectobacillus roseus","Rheinheimera sp","Flavobacterium sp","Parabacteroides sp","Synergistaceae sp","Desulfovibrio sp","Bacteroides sp","Alphaproteobacterium UT-4","Rikenella sp"))+theme_npgray()+ theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.

ggsave("core_heatmap_abo_3.pdf",width=10,height=10)




################################################################################
#   Core with 20% minimal prevalence and minimum detection threshold of 0.001  #
################################################################################
# Selection of core microbiota with 20% minimum prevalence
physeq2.core <- core(physeq.rel, detection = 0.001, prevalence = .2)

#Fetching taxa names corresponding to the core microbiota OTUs
core.taxa <- taxa(physeq2.core)

#Fetching taxonomy corresponding to the core microbiota OTUs
tax.mat <- tax_table(physeq2.core)
tax.df <- as.data.frame(tax.mat)

# Adding OTU identifiers
tax.df$OTU <- rownames(tax.df)

# select taxonomy of only
# those OTUs that are core memebers based on the thresholds that were used.
core.taxa.class <- dplyr::filter(tax.df, rownames(tax.df) %in% core.taxa)

prevalences <- seq(.05, 1, .05)
detections <- 10^seq(log10(1e-3), log10(.2), length = 20)


plot_core(physeq.rel,
    plot.type = "heatmap",
    prevalences = prevalences,
    detections = detections,
    colours = rev(brewer.pal(5, "Spectral")),
    min.prevalence = .2) + theme_npgray()+ scale_x_discrete(breaks=detections,labels=c("0.001","","","","","","0.005","","","","","","","","0.05","","","","","0.20"),name="Detection threshold in % of relative abundance")

ggsave("core_heatmap_prev_02.pdf",width=10,height=14)