#############################################
# METATETARD #
# Community analysis of OTUs #
# on developmental serie 16S rRNA GENE data #
# XPDEV #
#############################################
###################################################################
# Prevalence analysis on Xenopus developmental serie 16S DNA data #
###################################################################
library(microbiome)
## Loading required package: phyloseq
## Loading required package: ggplot2
##
## microbiome R package (microbiome.github.com)
##
##
##
## Copyright (C) 2011-2020 Leo Lahti,
## Sudarshan Shetty et al. <microbiome.github.io>
##
## Attaching package: 'microbiome'
## The following object is masked from 'package:ggplot2':
##
## alpha
## The following object is masked from 'package:base':
##
## transform
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(phytools)
## Loading required package: ape
## Loading required package: maps
library(viridis)
## Loading required package: viridisLite
library(RColorBrewer)
## Folders, Themes, colors
source("prelude.R")
# Setting up directories
data_dir <-paste0(data_dir_path,"xpdev")
output_dir <- paste0(output_dir_path,"Supp_Figure_2")
# Importing data
# Defining the directory containing the data to import
setwd(data_dir)
# Importing abundance data
comm<-read.table("nmt3_abondance_tpn_notax_XP_Dev.asc", header=TRUE, row.names=1)
# Getting rid of OTUs with zero abundance
comm<-comm %>% select_if((function(col) is.numeric(col) && sum(col) > 0))
comm<-t(comm)
# Importing metadata
metadata<-read.csv("nmt3_2_metadata_xp_dev.csv",sep=";",header=TRUE,row.names=1)
taxmat<-read.csv("nmt3_Gal18_abondance_tax.csv",sep=";",header=TRUE,row.names=1)
taxmat<-as.matrix(taxmat)
OTU = otu_table(comm, taxa_are_rows = TRUE)
TAX = tax_table(taxmat)
sample_data = sample_data(metadata)
# Importing phylogeny
phy<-read.tree("nmt3_sequences_mpr.tree")
# Rooting the tree using midpoint rooting
phy<-midpoint.root(phy)
physeq.ini = merge_phyloseq(OTU,sample_data,TAX,phy)
# Transformation to relative abundance
physeq.rel <- microbiome::transform(physeq.ini, "compositional")
#############################################################################################################
# We determine the size of the core microbiome using a set of abundance and prevalence thresholds #
# see Salonen et al. CMI, 2012 #
#############################################################################################################
# Core line plot
det <- c(0, 0.1, 0.5, 2, 5, 20)/100
prevalences <- seq(.05, 1, .05)
plot_core(physeq.rel, prevalences = prevalences, detections = det, plot.type = "lineplot") + xlab("Relative Abundance in %") + theme_npgray()
## Warning: `legend.margin` must be specified using `margin()`. For the old
## behavior use legend.spacing
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous x-axis
#################################################
# Core microbiome visualization using heatmaps #
# Core microbiome definition : #
# with 50% minimal prevalence and minimum #
# detection threshold of 0.001 % #
#################################################
########################################################################
# 50% minimal prevalence and minimum detection threshold of 0.001 #
########################################################################
physeq1.core <- core(physeq.rel, detection = 0.001, prevalence = .5)
# Fetching taxa names corresponding to the core microbiota OTUs
core.taxa <- taxa(physeq1.core)
# Fetching taxonomy corresponding to the core microbiota OTUs
tax.mat <- tax_table(physeq1.core)
tax.df <- as.data.frame(tax.mat)
# Adding OTU identifiers
tax.df$OTU <- rownames(tax.df)
# select taxonomy of only
# those OTUs that are core memebers based on the thresholds that were used.
core.taxa.class <- dplyr::filter(tax.df, rownames(tax.df) %in% core.taxa)
# Saving a file containing the list of OTUs and their taxonomy
setwd(output_dir)
write.table(core.taxa.class,file="core1_dev_taxa_otu.csv",sep=";",row.names=TRUE,col.names=TRUE)
# Selection of core microbiota OTUs in the starting physeq.ini
my_subset <- subset(otu_table(physeq.ini), rownames(otu_table(physeq.ini)) %in% tax.df$OTU)
physeq.core.ini <- merge_phyloseq(my_subset, tax_table(physeq.ini), sample_data(physeq.ini),phy_tree(physeq.ini))
##########################
# Plotting the heat map #
##########################
# Defining the output dir
setwd(output_dir)
prevalences <- seq(.05, 1, .05)
detections <- 10^seq(log10(1e-3), log10(.2), length = 20)
# Graphique en heatmap
plot_core(physeq.rel, plot.type = "heatmap", prevalences = prevalences, detections = detections, min.prevalence = .5,colours = rev(brewer.pal(5, "Spectral")),)+theme_npgray()
## Warning: `legend.margin` must be specified using `margin()`. For the old
## behavior use legend.spacing
ggsave("core_xpdev_heatmap_prev_05_raw.pdf",width=8,height=6)
# I fetched the list of OTUs and their order from the pdf of plot_core
# Then I replace OTUs IDs by a more informative taxonomic name
# A partir du pdf du plot_core sans modification des légendes y (:-o) et je le renseigne dans le fichier core1_taxa_otu.xlsx pour remplacer les noms d'OTU par un nom taxonomique
# Je récupère la liste des OTUs et l'ordre dans lequel ils sont affichés dans le plot brut à partir du core_xpdev_heatmap_prev_05_raw.pdf (sans modification des légendes y (:-o)) et je le renseigne dans le fichier core1_dev_taxa_otu.xlsx pour remplacer les noms d'OTU par un nom taxonomique
plot_core(physeq.rel, plot.type = "heatmap", prevalences = prevalences, detections = detections, min.prevalence = .5,colours = rev(brewer.pal(5, "Spectral")),)+theme_npgray()+scale_x_discrete(breaks=detections,labels=c("0.001","","","","","","0.005","","0.010","","0.016","","","","0.05","","","","",""),name="Detection threshold in % of relative abundance")+scale_y_discrete(labels=c("Desulfovibrio sp","Hydrogenoanaerobacterium sp","Lachnospiraceae NK4A136 group sp","Rhizobiales sp","Hydrogenoanaerobacterium sp","Clostridium sensu stricto 1 sp","Microbacteriaceae bacterium MAEY21","Desulfovibrio sp","Desulfovibrio sp","Alpha proteobacterium UT-4")) + theme(legend.text=element_text(size=10))
## Warning: `legend.margin` must be specified using `margin()`. For the old
## behavior use legend.spacing
ggsave("core_xpdev_heatmap_prev_05.pdf",width=8,height=6)
plot_heatmap(physeq.core.ini,sample.label="Description",low="#66CCFF", high="#000033", na.value="white")+scale_x_discrete(limits=c(
"XT1A4G","XT1A5G","XT1B2G","XT1B2Gbis","XT1B4G","XT1B5G","XT1A2G","XT1B1G","XTLS13","XTLS14","XTLS15","XTLS16","XTLS17","XTLS12","XTLS2","XTLS3","XTLS4","XTLS5","XTLS6","XTLS1","XTLS10","XTLS11","XTLS7","XTLS8","XTLS9","XTLSM1","XTLSM2","XTLSM3","XTLTW2M","XTLTW3F","XTLTW3M","XTLTW4F","XTLTW5F"))+theme_npgray()+ theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning in psmelt(physeq): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
## Warning in psmelt(physeq): The sample variables:
## Species
## have been renamed to:
## sample_Species
## to avoid conflicts with taxonomic rank names.
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: `legend.margin` must be specified using `margin()`. For the old
## behavior use legend.spacing
## Warning: Transformation introduced infinite values in discrete y-axis
ggsave("core_xpdev_heatmap_abo_1.pdf",width=8,height=6)
## Warning: Transformation introduced infinite values in discrete y-axis
# Same graph with OTUs in the same order as the prevalence plot, and grouped by developmental stages
plot_heatmap(physeq.core.ini,sample.label="Description",low="#66CCFF", high="#000033", na.value="white")+scale_x_discrete(limits=c(
"XT1A4G","XT1A5G","XT1B2G","XT1B2Gbis","XT1B4G","XT1B5G","XT1A2G","XT1B1G","XTLS13","XTLS14","XTLS15","XTLS16","XTLS17","XTLS12","XTLS2","XTLS3","XTLS4","XTLS5","XTLS6","XTLS1","XTLS10","XTLS11","XTLS7","XTLS8","XTLS9","XTLSM1","XTLSM2","XTLSM3","XTLTW2M","XTLTW3F","XTLTW3M","XTLTW4F","XTLTW5F"))+scale_y_discrete(limits=c("Cluster_53","Cluster_75","Cluster_70","Cluster_6","Cluster_41","Cluster_11","Cluster_2","Cluster_3","Cluster_7","Cluster_1"))+theme_npgray()+ theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning in psmelt(physeq): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
## Warning in psmelt(physeq): The sample variables:
## Species
## have been renamed to:
## sample_Species
## to avoid conflicts with taxonomic rank names.
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: `legend.margin` must be specified using `margin()`. For the old
## behavior use legend.spacing
## Warning: Transformation introduced infinite values in discrete y-axis
ggsave("core_xpdev_heatmap_abo_2.pdf",width=8,height=6)
## Warning: Transformation introduced infinite values in discrete y-axis
# Same graph with OTUs changed to their taxonomy
plot_heatmap(physeq.core.ini,sample.label="Description",low="#66CCFF", high="#000033", na.value="white")+scale_x_discrete(limits=c("XT1A4G",
"XT1A5G","XT1B2G","XT1B2Gbis","XT1B4G","XT1B5G","XT1A2G","XT1B1G","XTLS13","XTLS14","XTLS15","XTLS16","XTLS17","XTLS12","XTLS2","XTLS3","XTLS4","XTLS5","XTLS6","XTLS1","XTLS10","XTLS11","XTLS7","XTLS8","XTLS9","XTLSM1","XTLSM2","XTLSM3","XTLTW2M","XTLTW3F","XTLTW3M","XTLTW4F","XTLTW5F"),labels=c("NF55","NF55","NF55","NF55","NF55","NF55","NF55","NF56","NF57","NF58","NF58","NF58","NF58","NF61","NF62","NF62","NF62","NF62","NF62","NF66","NF66","NF66","NF66","NF66","NF66","Adult","Adult","Adult","Adult","Adult","Adult","Adult","Adult"))+scale_y_discrete(limits=c("Cluster_53","Cluster_75","Cluster_70","Cluster_6","Cluster_41","Cluster_11","Cluster_2","Cluster_3","Cluster_7","Cluster_1"),labels=c("Desulfovibrio sp","Hydrogenoanaerobacterium sp","Lachnospiraceae NK4A136 group sp","Rhizobiales sp","Hydrogenoanaerobacterium sp","Clostridium sensu stricto 1 sp","Microbacteriaceae bacterium MAEY21","Desulfovibrio sp","Desulfovibrio sp","alpha proteobacterium UT-4"))+theme_npgray()+ theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning in psmelt(physeq): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
## Warning in psmelt(physeq): The sample variables:
## Species
## have been renamed to:
## sample_Species
## to avoid conflicts with taxonomic rank names.
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: `legend.margin` must be specified using `margin()`. For the old
## behavior use legend.spacing
## Warning: Transformation introduced infinite values in discrete y-axis
ggsave("core_xpdev_heatmap_abo_3.pdf",width=8,height=6)
## Warning: Transformation introduced infinite values in discrete y-axis
##################################################################################
# Core with 50% minimal prevalence and minimum detection threshold of 0.00001 #
##################################################################################
physeq2.core <- core(physeq.rel, detection = 0.00001, prevalence = .5)
#Fetching taxa names corresponding to the core microbiota OTUs
core.taxa <- taxa(physeq2.core)
#Fetching taxonomy corresponding to the core microbiota OTUs
tax.mat <- tax_table(physeq2.core)
tax.df <- as.data.frame(tax.mat)
# Adding OTU identifiers
tax.df$OTU <- rownames(tax.df)
# select taxonomy of only
# those OTUs that are core memebers based on the thresholds that were used.
core.taxa.class <- dplyr::filter(tax.df, rownames(tax.df) %in% core.taxa)
# Save a file with OTU IDs and taxonomy
setwd(output_dir)
write.table(core.taxa.class,file="core2_dev_taxa_otu.csv",sep=";",row.names=TRUE,col.names=TRUE)
# Select OTUs from core microbiota in physeq.ini
#
my_subset <- subset(otu_table(physeq.ini), rownames(otu_table(physeq.ini)) %in% tax.df$OTU)
physeq2.core.ini <- merge_phyloseq(my_subset, tax_table(physeq.ini), sample_data(physeq.ini),phy_tree(physeq.ini))
# Heatmap with a much lower abundance thresholds
prevalences <- seq(.1, 1, .1)
detections <- 10^seq(log10(1e-5), log10(.2), length = 20)
plot_core(physeq.rel, plot.type = "heatmap", prevalences = prevalences, detections = detections, min.prevalence = .5,colours = rev(brewer.pal(5, "Spectral")),)+scale_x_discrete(breaks=detections,labels=c("1e-5","","","5e-5","","1e-4","","","","1e-3","","","","","0.015","","","","",""),name="Detection threshold in % of relative abundance")
# Same plot with taxonomy insteat of OTU_ID
plot_heatmap(physeq2.core.ini,sample.label="Description",low="#66CCFF", high="#000033",na.value="white")+scale_x_discrete(limits=c("XT1A4G","XT1A5G","XT1B2G","XT1B2Gbis","XT1B4G","XT1B5G","XT1A2G","XT1B1G","XTLS13","XTLS14","XTLS15","XTLS16","XTLS17","XTLS12","XTLS2","XTLS3","XTLS4","XTLS5","XTLS6","XTLS1","XTLS10","XTLS11","XTLS7","XTLS8","XTLS9","XTLSM1","XTLSM2","XTLSM3","XTLTW2M","XTLTW3F","XTLTW3M","XTLTW4F","XTLTW5F"),labels=c("NF55","NF55","NF55","NF55","NF55","NF55","NF56","NF56","NF57","NF58","NF58","NF58","NF58","NF61","NF62","NF62","NF62","NF62","NF62","NF66","NF66","NF66","NF66","NF66","NF66","Adult","Adult","Adult","Adult","Adult","Adult","Adult","Adult"))
## Warning in psmelt(physeq): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
## Warning in psmelt(physeq): The sample variables:
## Species
## have been renamed to:
## sample_Species
## to avoid conflicts with taxonomic rank names.
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Transformation introduced infinite values in discrete y-axis