#############################################
#               METATETARD                  #
# Phylum abundance analysis of OTUs         #
# on adult tissue serie 16S rRNA GENE data  #
# XPGUT                                     #
#############################################
# Analysis using phyloseq
library(phyloseq)
library(ggplot2)
library(phytools)
## Loading required package: ape
## Loading required package: maps
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
## Folders, Themes, colors
source("prelude.R")

# Setting up directories
data_dir <-paste0(data_dir_path,"xpgut")
output_dir <- paste0(output_dir_path,"Figure3_gut")

####################################################
# Figure barplot proportion of most abundant phyla #
####################################################
# Importing data
# Defining the directory containing the data to import
setwd(data_dir)

# Working on abundance data from 100 rarefactions
comm<-read.table("xpgut_fichier_allcom.txt", header=TRUE, row.names=1)

# Getting rid of OTUs with zero abundance
comm<-comm %>% select_if((function(col) is.numeric(col) && sum(col) > 0))
comm<-t(comm)

#Importing metadata
metadata<-read.csv("nmt3_2_metadata_AdultTissue.csv",sep=";",header=TRUE,row.names=1)
taxmat<-read.csv("nmt3_Gal18_abondance_tax.csv",sep=";",header=TRUE,row.names=1)
taxmat<-as.matrix(taxmat)
OTU<-otu_table(comm, taxa_are_rows = TRUE)
TAX<-tax_table(taxmat)
sample_data<-sample_data(metadata)
#Importing phylogeny
phy<-read.tree("nmt3_sequences_mpr.tree")
#Rooting the tree using midpoint rooting
phy<-midpoint.root(phy)
physeq.ini<-merge_phyloseq(OTU,sample_data,TAX,phy)

#2 - We fuse taxa at the phylum level
physeq.ini.Rg<-tax_glom(physeq.ini, taxrank="Phylum")

#3 - Transforming abundance counts into proportions
physeq.ini.Rgm = transform_sample_counts(physeq.ini.Rg, function(x) 100 * x/sum(x))

#4 - Filtering OTUs
#We keep OTUs with at least 0.5% relative abundance in a given sample
physeq.ini.Rfgm = filter_taxa(physeq.ini.Rgm,function(x) mean(x)>0.5,TRUE)

#5 - We merge samples corresponding to the same Developmental Stage category
# Transform all variables to factors for merging (trick required for R4.0)
df <- as.data.frame(lapply(sample_data(physeq.ini.Rfgm),function (y) if(class(y)!="factor" ) as.factor(y) else y),stringsAsFactors=T)
row.names(df) <- sample_names(physeq.ini.Rfgm)
sample_data(physeq.ini.Rfgm) <- sample_data(df)
#Merging
physeq.ini.Rfgm2 = merge_samples(physeq.ini.Rfgm, "Tissue")

#6 - We recompute the relative abundance because the sample merge sums up the relative abundance of each merged sample
physeq.ini.Rf2gm2 = transform_sample_counts(physeq.ini.Rfgm2, function(x) 100 * x/sum(x))

# We need to recover the good legends corresponding to the category used for merging samples
sample_data(physeq.ini.Rf2gm2)$Tissue <- levels(sample_data(physeq.ini.Rg)$Tissue)

#7 - We can go on with the abundance plot itself
# Defining the output dir
setwd(output_dir)

# Plotting by Adult tissue
plot_stade_adult_tissues<-plot_bar(physeq.ini.Rf2gm2, fill = "Phylum")+ ylab("Percentage of Sequences")+ scale_fill_manual(values=jcolors)+theme_nb()+theme(axis.text.x = element_text(angle = 0,hjust=0.5))+scale_x_discrete(limits=c("Skin","Feces","Colon","Intestine","Gut","Stomach"))+coord_flip()
## Warning in psmelt(physeq): The sample variables: 
## Sample
##  have been renamed to: 
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
## Warning in psmelt(physeq): The sample variables: 
## Species, Class
##  have been renamed to: 
## sample_Species, sample_Class
## to avoid conflicts with taxonomic rank names.
## Warning: `axis.ticks.margin` is deprecated. Please set `margin` property of
## `axis.text` instead
plot_stade_adult_tissues

ggsave("plot_stade_adult_tissues.pdf",width=7,height=5)

#8 Plotting by phylum
p<-plot_bar(physeq.ini.Rf2gm2,facet_grid=~Phylum,fill = "Phylum")+theme_nb()+theme(axis.text.x = element_text(angle = 0,hjust=0.5))+ylab("Abundance in %")+scale_fill_manual(values=jcolors)+scale_x_discrete(limits=c("Skin","Feces","Colon","Intestine","Gut","Stomach"),labels=c("E","F","C","I","G","S"))
## Warning in psmelt(physeq): The sample variables: 
## Sample
##  have been renamed to: 
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
## Warning in psmelt(physeq): The sample variables: 
## Species, Class
##  have been renamed to: 
## sample_Species, sample_Class
## to avoid conflicts with taxonomic rank names.
## Warning: `axis.ticks.margin` is deprecated. Please set `margin` property of
## `axis.text` instead
p

ggsave("plot_stade_compophylum.pdf",width=9,height=5)

########################
#   Ordination plots  #
#######################

# We keep OTUs seen more than 5 times in 5% of samples
MT1<-filter_taxa(physeq.ini, function(x) sum(x > 5) > (0.05*length(x)), TRUE)
#How many OTUs in the starting data set ?
length(row.names(otu_table(physeq.ini)))
## [1] 758
# How many OTUs have been filtered ?
length(row.names(otu_table(MT1)))
## [1] 736
# Il y a 736 OTUs sur 758 qui passent le filtre d'abondance et de prévalence.

# We filter OTUs with abundance of more than 0.001% and convert into relative abundance
count2proportion <- function(x){
    x[(x / sum(x)) < (1e-5)] <- 0
    return(x)
}
MT1<-transform_sample_counts(MT1,fun=count2proportion)

# Weighted unifrac - Community structure
ordu<-ordinate(MT1, "PCoA", "unifrac", weighted=TRUE)
pw<-plot_ordination(MT1, ordu, shape="Tissue")
plotw<-pw+geom_point(size=5,alpha=0.5,fill="grey")+scale_shape_manual(values=c(15,0,16,1,17,2))+labs(shape="Sample")
plotw<-plotw+ggtitle("Community structure")+theme_npgray()+theme(legend.position="top")
## Warning: `legend.margin` must be specified using `margin()`. For the old
## behavior use legend.spacing
plotw

ggsave("MDSPCOA_weighted_UNIFRAC.pdf",width=8,height=6)
#Removing the legend for the next plot with grid arrange
plotw<-plotw+guides(shape=FALSE)



# Unweighted unifrac - Community membership
ordub<-ordinate(MT1, "PCoA", "unifrac", weighted=FALSE)
puw<-plot_ordination(MT1, ordub, shape="Tissue")
plotuw<-puw+geom_point(size=5,alpha=0.5,fill="grey")+scale_shape_manual(values=c(15,0,16,1,17,2))+labs(shape="Sample")
plotuw<-plotuw+ggtitle("Community membership")+theme_npgray()
## Warning: `legend.margin` must be specified using `margin()`. For the old
## behavior use legend.spacing
plotuw

ggsave("MDSPCOA_unweighted_UNIFRAC.pdf",width=8,height=6)
#plotuw+theme(legend.position="top")
#ggsave("MDSPCOA_unweighted_top_UNIFRAC.pdf",width=9,height=7)
#Removing the legend for the next plot with grid arrange
plotuw<-plotuw+guides(shape=FALSE)

# For presentations
#pdf("plotMDSPCOA_wuw.pdf",width=12,height=6)
#print(grid.arrange(plotuw, plotw, ncol=2))
#dev.off()

# Test the significance of unifrac distances
library("picante")
## Loading required package: vegan
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-6
## 
## Attaching package: 'vegan'
## The following object is masked from 'package:phytools':
## 
##     scores
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
# beta diversity: compute unweighted unifrac distance
uudist<-distance(physeq.ini,method="uunifrac")
adonis(uudist~Tissue,data=metadata)
## 
## Call:
## adonis(formula = uudist ~ Tissue, data = metadata) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## Tissue     5    2.9852 0.59705  6.4641 0.41801  0.001 ***
## Residuals 45    4.1564 0.09236         0.58199           
## Total     50    7.1416                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# beta diversity: compute weighted unifrac distance
uwdist<-distance(physeq.ini,method="wunifrac")
adonis(uwdist~Tissue,data=metadata)
## 
## Call:
## adonis(formula = uwdist ~ Tissue, data = metadata) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model     R2 Pr(>F)    
## Tissue     5    4.4947 0.89894  10.447 0.5372  0.001 ***
## Residuals 45    3.8721 0.08605         0.4628           
## Total     50    8.3668                 1.0000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
###########################################
# Comparing distances in ordination plots #
###########################################
#Loading the required function from phyloseq_extended

source("https://raw.githubusercontent.com/mahendra-mariadassou/phyloseq-extended/master/R/graphical_methods.R")
## Loading required package: scales
## Loading required package: reshape2
p1 <- plot_samples(MT1, ordinate(MT1, "MDS", "cc"), color = "Tissue") + ggtitle("MDS + Jaccard") +guides(color=FALSE)
p2 <- plot_samples(MT1, ordinate(MT1, "MDS", "unifrac"), color = "Tissue") + ggtitle("MDS + UniFrac") +guides(color=FALSE)
p3 <- plot_samples(MT1, ordinate(MT1, "MDS", "bray"), color = "Tissue") + ggtitle("MDS + Bray-Curtis") +guides(color=FALSE)
p4 <- plot_samples(MT1, ordinate(MT1, "MDS", "wunifrac"), color = "Tissue") + ggtitle("MDS + weighted UniFrac") +guides(color=FALSE)
grid.arrange(p1, p2, p3, p4, ncol = 2, nrow = 2)

There is no major differences between the metrics to assess community membership (Jaccard and unweighted UniFrac). This is hinting at phylogenetically unrelated taxa making up the distinctions of communities between the tissues. We do see differences between the two metrics to assess community structure (Bray-Curtis and weighted unifrac). Using Bray-Curtis, intestine and stomach communities are more similar to each other; and colon, feces and whole gut are grouped. Using weighted UniFrac, the intestine is more similar to skin than to stomach. This may stem from sampling depth effect more pronounced on Bray-Curtis distance.