METATETARD

Analysis of Phylum and Family abundances

on early embryos and tadpoles

serie 16S rRNA RNA data

#Xenopus metatetard XPEARLY

# Warnings and message turned off for rendering
library(phyloseq)
library(ggplot2)
library(phytools)
library(dplyr)
library(gridExtra)
## Warnings turned off for rendering.
## Folders, Themes, colors
source("prelude.R")

# Setting up directories
data_dir <-paste0(data_dir_path,"xpearly")
output_dir <- paste0(output_dir_path,"Supp_Figure_5")


####################################################
# Figure barplot proportion of most abundant phyla #
####################################################
# Importing data
# Defining the directory containing the data to import
setwd(data_dir)


# Working on abundance data from 100 rarefactions
comm<-read.table("xpearly_fichier_allcom.txt", header=TRUE, row.names=1)
# Getting rid of OTUs with zero abundance
comm<-comm %>% select_if((function(col) is.numeric(col) && sum(col) > 0))
comm<-t(comm)
#Importing metadata
metadata<-read.csv("nmt3_metadata_tearlydev.csv",sep=";",header=TRUE,row.names=1)
taxmat<-read.csv("nmt3_Gal18_abondance_tax.csv",sep=";",header=TRUE,row.names=1)
taxmat<-as.matrix(taxmat)
OTU<-otu_table(comm, taxa_are_rows = TRUE)
TAX<-tax_table(taxmat)
sample_data<-sample_data(metadata)
#Importing phylogeny
phy<-read.tree("nmt3_sequences_mpr.tree")
#Rooting the tree using midpoint rooting
phy<-midpoint.root(phy)
physeq.ini = merge_phyloseq(OTU,sample_data,TAX,phy)

#2 - We fuse taxa at the phylum level
physeq.ini.Rg<-tax_glom(physeq.ini, taxrank="Phylum")

#3 - Transforming abundance counts into proportions
physeq.ini.Rgm = transform_sample_counts(physeq.ini.Rg, function(x) 100 * x/sum(x))

#4 - Filtering OTUs
#We keep OTUs with at least 0.5% relative abundance in a given sample
physeq.ini.Rfgm = filter_taxa(physeq.ini.Rgm,function(x) mean(x)>0.5,TRUE)

#5 - We can go on with the abundance plot itself
# Defining the output dir
setwd(output_dir)
#Plotting
plot_stade_early_compo2<-plot_bar(physeq.ini.Rfgm, "Sample", fill = "Phylum")+ ylab("Percentage of Sequences")+ scale_fill_manual(values=jcolors)+theme_nb()+theme(axis.text.x = element_text(angle = 0,hjust=0.5))+scale_x_discrete(limits=c("SD-SL-PE","SD-SL-NF41","SD-SL-NF48","SD-SL-NF50","SD-SL-NF52","SD-TGA-PE","SD-TGA-NF41","SD-TGA-NF48","SD-TGA-NF50","SD-TGA-NF52"))+coord_flip()
plot_stade_early_compo2

ggsave("plot_stade_early_compo.pdf",width=7,height=5)


plot_early_compoSL<-plot_bar(physeq.ini.Rfgm, "Sample", fill = "Phylum")+ ylab("Percentage of Sequences")+ scale_fill_manual(values=jcolors)+theme_nb()+theme(axis.text.x = element_text(angle = 0,hjust=0.5))+scale_x_discrete(limits=c("SD-SL-NF52","SD-SL-NF50","SD-SL-NF48","SD-SL-NF41","SD-SL-PE"),labels=c("A-NF52","A-NF50","A-NF48","A-NF41","A-NF30"))+coord_flip()+facet_wrap(~Phylum)


plot_early_compoTGA<-plot_bar(physeq.ini.Rfgm, "Sample", fill = "Phylum")+ ylab("Percentage of Sequences")+ scale_fill_manual(values=jcolors)+theme_nb()+theme(axis.text.x = element_text(angle = 0,hjust=0.5))+scale_x_discrete(limits=c("SD-TGA-NF52","SD-TGA-NF50","SD-TGA-NF48","SD-TGA-NF41","SD-TGA-PE"),labels=c("B-NF52","B-NF50","B-NF48","B-NF41","B-NF30"))+coord_flip()+facet_wrap(~Phylum)

grid.arrange(plot_early_compoSL, plot_early_compoTGA,nrow=2)

#pdf("plot_stade_early_compo2_phylum_TGASL.pdf",width=7,height=10)
#print(grid.arrange(plot_early_compoSL, plot_early_compoTGA,nrow=2))
#dev.off()



#######################
#   Ordination plots  #
#######################

# We keep OTUs seen more than 5 times in 5% of samples
MT1 <-  filter_taxa(physeq.ini, function(x) sum(x > 5) > (0.05*length(x)), TRUE)
# How many OTUs in our starting data ?
length(row.names(otu_table(physeq.ini)))
## [1] 562
# How many OTUs have been filtered ?
length(row.names(otu_table(MT1)))
## [1] 562
# Il y a 562 OTUs sur 562 qui passent le filtre d'abondance et de prévalence.


# We filter OTUs with abundance of more than 0.001% and convert into relative abundance
count2proportion <- function(x){
    x[(x / sum(x)) < (1e-5)] <- 0
    return(x)
}
MT1<-transform_sample_counts(MT1,fun=count2proportion)


###########################################
# Comparing distances in ordination plots #
###########################################
#Loading the required function from phyloseq_extended
source("https://raw.githubusercontent.com/mahendra-mariadassou/phyloseq-extended/master/R/graphical_methods.R")
## Loading required package: scales
## Loading required package: reshape2
# Ordination according to developmental stage (colors) and cross (shape)
p1 <- plot_samples(MT1, ordinate(MT1, "MDS", "cc"), color = "DevelopmentalStage",shape="Population")  + ggtitle("MDS + Jaccard") + guides(color=FALSE,shape=FALSE)
p2 <- plot_samples(MT1, ordinate(MT1, "MDS", "unifrac"), color = "DevelopmentalStage",shape="Population")  + ggtitle("MDS + UniFrac") + guides(color=FALSE,shape=FALSE)
p3 <- plot_samples(MT1, ordinate(MT1, "MDS", "bray"), color = "DevelopmentalStage",shape="Population")  + ggtitle("MDS + Bray-Curtis") + guides(color=FALSE,shape=FALSE)
p4 <- plot_samples(MT1, ordinate(MT1, "MDS", "wunifrac"), color = "DevelopmentalStage",shape="Population") + ggtitle("MDS + weighted UniFrac") + guides(color=FALSE,shape=FALSE)
grid.arrange(p1, p2, p3, p4, ncol = 2, nrow = 2)

# Ordination according to life stage (colors) and cross (shape)
p1 <- plot_samples(MT1, ordinate(MT1, "MDS", "cc"), color = "LifeStage",shape="Population")  + ggtitle("MDS + Jaccard") +guides(color=FALSE,shape=FALSE)
p2 <- plot_samples(MT1, ordinate(MT1, "MDS", "unifrac"), color = "LifeStage",shape="Population")  + ggtitle("MDS + UniFrac") +guides(color=FALSE,shape=FALSE)
p3 <- plot_samples(MT1, ordinate(MT1, "MDS", "bray"), color = "LifeStage",shape="Population")  + ggtitle("MDS + Bray-Curtis") +guides(color=FALSE,shape=FALSE)
p4 <- plot_samples(MT1, ordinate(MT1, "MDS", "wunifrac"), color = "LifeStage",shape="Population") + ggtitle("MDS + weighted UniFrac") +guides(color=FALSE,shape=FALSE)
grid.arrange(p1, p2, p3, p4, ncol = 2, nrow = 2)