METATETARD

Phylum abundance analysis of OTUs

on 16S rRNA RNA data

# Message and warning turned off for rendering
# Analysis using phyloseq
library(phyloseq)
library(ggplot2)
library(phytools)
library(dplyr)
library(gridExtra)
# Warning turned off for rendering
## Folders, Themes, colors
source("prelude.R")

# Setting up directories
data_dir <-paste0(data_dir_path,"xplate")
output_dir <- paste0(output_dir_path,"Figure_late")

####################################################
# Figure barplot proportion of most abundant phyla #
####################################################
# Importing data
# Defining the directory containing the data to import
setwd(data_dir)

# Working on abundance data from 100 rarefactions
comm<-read.table("xplate_fichier_allcom.txt", header=TRUE, row.names=1)
# Getting rid of OTUs with zero abundance
comm<-comm %>% select_if((function(col) is.numeric(col) && sum(col) > 0))
comm<-t(comm)
#Importing metadata
metadata<-read.csv("nmt3_metadata_late.csv",sep=";",header=TRUE,row.names=1)
taxmat<-read.csv("nmt3_Gal18_abondance_tax.csv",sep=";",header=TRUE,row.names=1)
taxmat<-as.matrix(taxmat)
OTU<-otu_table(comm, taxa_are_rows = TRUE)
TAX<-tax_table(taxmat)
sample_data<-sample_data(metadata)
#Importing phylogeny
phy<-read.tree("nmt3_sequences_mpr.tree")
#Rooting the tree using midpoint rooting
phy<-midpoint.root(phy)
physeq.ini<-merge_phyloseq(OTU,sample_data,TAX,phy)

#2 - We fuse taxa at the phylum level
physeq.ini.Rg<-tax_glom(physeq.ini, taxrank="Phylum")

#3 - Transforming abundance counts into proportions
physeq.ini.Rgm <- transform_sample_counts(physeq.ini.Rg, function(x) 100 * x/sum(x))

#4 - Filtering OTUs
#We keep OTUs with at least 0.5% relative abundance in a given sample
physeq.ini.Rfgm = filter_taxa(physeq.ini.Rgm,function(x) mean(x)>0.5,TRUE)

#5 - We merge samples corresponding to the same Developmental Stage category
# Transform all variables to factors for merging (trick required for R4.0)
df <- as.data.frame(lapply(sample_data(physeq.ini.Rfgm),function (y) if(class(y)!="factor" ) as.factor(y) else y),stringsAsFactors=T)
row.names(df) <- sample_names(physeq.ini.Rfgm)
sample_data(physeq.ini.Rfgm) <- sample_data(df)
#Merging
physeq.ini.Rfgm2 = merge_samples(physeq.ini.Rfgm, "DevelopmentalStage")

#6 - We recompute the relative abundance because the sample merge sums up the relative abundance of each merged sample
physeq.ini.Rf2gm2 = transform_sample_counts(physeq.ini.Rfgm2, function(x) 100 * x/sum(x))

# We need to recover the good legends corresponding to the category used for merging samples
sample_data(physeq.ini.Rf2gm2)$DevelopmentalStage <- levels(sample_data(physeq.ini.Rg)$DevelopmentalStage)

#7 - We can go on with the abundance plot itself
# Defining the output dir
setwd(output_dir)
# Plotting
plot_stade_late_compo2<-plot_bar(physeq.ini.Rfgm, "DevelopmentalStage", fill = "Phylum")+ ylab("Percentage of Sequences")+ scale_fill_manual(values=jcolors)+theme_nb()+theme(axis.text.x = element_text(angle = 0,hjust=0.5))+coord_flip()+scale_x_discrete(limits=c("NF66","NF63","NF60","NF56","NF52"))+facet_grid(~Population,scale="free_x",space="free_x")
plot_stade_late_compo2

ggsave("plot_stade_late_compo.pdf",width=10,height=7)



#######################
#   Ordination plots  #
#######################

#Ordination using unifrac and Principal Coordinate Analysis (PCoA)
# We filter OTUs with abundance of more than 0.001% and convert into relative abundance

count2proportion = function(x){
    x[(x / sum(x)) < (1e-5)] <- 0
    return(x)
}
MTL<-transform_sample_counts(physeq.ini,fun=count2proportion)

ordub <- ordinate(MTL, "PCoA", "unifrac", weighted=FALSE)
puw <- plot_ordination(MTL, ordub, shape="LifeStage")
plotuw<-puw+geom_point(size=3,alpha=0.5,fill="grey")+scale_shape_manual(values=c(15,0,16,1,6))+labs(shape="LifeStage")
plotuw<-plotuw+ggtitle("Community membership")+theme_npgray()
plotuw<-plotuw +theme(legend.position="top")
plotuw

ggsave("MDSPCOA_unweighted_UNIFRAC_late.pdf",width=8,height=6)

###########################################
# Comparing distances in ordination plots #
###########################################
#Loading the required function from phyloseq_extended

source("https://raw.githubusercontent.com/mahendra-mariadassou/phyloseq-extended/master/R/graphical_methods.R")
## Loading required package: scales
## Loading required package: reshape2
p1 <- plot_samples(MTL, ordinate(MTL, "MDS", "cc"), color = "LifeStage") + ggtitle("MDS + Jaccard") +guides(color=FALSE)
p2 <- plot_samples(MTL, ordinate(MTL, "MDS", "unifrac"), color = "LifeStage") + ggtitle("MDS + UniFrac") + guides(color=FALSE)
p3 <- plot_samples(MTL, ordinate(MTL, "MDS", "bray"), color = "LifeStage") + ggtitle("MDS + Bray-Curtis") + guides(color=FALSE)
p4 <- plot_samples(MTL, ordinate(MTL, "MDS", "wunifrac"), color = "LifeStage") + ggtitle("MDS + weighted UniFrac") + guides(color=FALSE)
grid.arrange(p1, p2, p3, p4, ncol = 2, nrow = 2)