#############################################
#               METATETARD                  #
# Community analysis of OTUs                #
# on developmental serie 16S rRNA GENE data #
# XPDEV                                     #
#############################################
# Analysis using phyloseq
library(phyloseq)
library(ggplot2)
library(phytools)
## Loading required package: ape
## Loading required package: maps
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(metagMisc)

## Folders, Themes, colors
source("prelude.R")

# Setting up directories
data_dir <-paste0(data_dir_path,"xpdev")
output_dir <- paste0(output_dir_path,"Figure2_ordination")

#######################
#   Ordination plots  #
#######################
# Importing data
# Defining the directory containing the data to import
setwd(data_dir)

# Importing abundance data
comm<-read.table("nmt3_abondance_tpn_notax_XP_Dev.asc",header=TRUE,row.names=1)
# Getting rid of OTUs with zero abundance
comm<-comm %>% select_if((function(col) is.numeric(col) && sum(col) > 0))
comm<-t(comm)
#Importing metadata
metadata<-read.csv("nmt3_2_metadata_xp_dev.csv",sep=";",header=TRUE,row.names=1)
taxmat<-read.csv("nmt3_Gal18_abondance_tax.csv",sep=";",header=TRUE,row.names=1)
taxmat<-as.matrix(taxmat)
OTU <- otu_table(comm, taxa_are_rows = TRUE)
TAX <- tax_table(taxmat)
sample_data <- sample_data(metadata)
#Importing phylogeny
phy<-read.tree("nmt3_sequences_mpr.tree")
#Rooting the tree using midpoint rooting
phy<-midpoint.root(phy)
physeq.ini<-merge_phyloseq(OTU,sample_data,TAX,phy)

##############################################################
# Working with proportions instead of counts (McKnight 2018) #
##############################################################

#Setting the working directory for output files
setwd(output_dir)

# We keep OTUs seen more than 5 times in 5% of samples
MT1 <-  filter_taxa(physeq.ini, function(x) sum(x > 5) > (0.05*length(x)), TRUE)
# How many OTUs in our starting data ?
length(row.names(otu_table(physeq.ini)))
## [1] 666
# How many OTUs have been filtered ?
length(row.names(otu_table(MT1)))
## [1] 426
# There are 426 OTUs out of 666 after filtering for low abundance and low prevalence.


# We convert into relative abundance
count2proportion <- function(x){
    x[(x / sum(x))] <- 0
    return(x)
}
MT1<-transform_sample_counts(MT1,fun=count2proportion)

#Ordination using weighted unifrac and Principal Coordinate Analysis (PCoA)
ordu<-ordinate(MT1, "PCoA", "unifrac", weighted=TRUE)
#Uncomment for plotting the ordination results in black and white
#pw<-plot_ordination(MT1, ordu, shape="LifeStage")
#plotw<-pw+geom_point(size=3,alpha=0.5,fill="grey")+scale_shape_manual(values=c(15,0,16,1,6),breaks=c("Premetamorph","Prometamorph","Metamorph","Froglet","Adult"))+labs(shape="LifeStage")
#plotw<-plotw+ggtitle("Community structure")+theme_npgray()
#plotw<-plotw + stat_ellipse(type="norm",linetype=2)+theme(legend.position="top")
#plotw
#ggsave("MDSPCOA_weighted_UNIFRAC_2.pdf",width=8,height=6)
#Removing the legend for the next plot with grid arrange
#plotw<-plotw+guides(shape=FALSE)

#Plotting the ordination results in color
pwc<-plot_ordination(MT1, ordu, color="LifeStage")
plotwc<-pwc+geom_point(size=3,alpha=0.5,fill="grey")+scale_color_manual(values=c("#d73027","#fc8d59","#4d4d4d","#91bfdb","#4575b4"),breaks=c("Premetamorph","Prometamorph","Metamorph","Froglet","Adult"))
plotwc<-plotwc+ggtitle("Community structure")+theme_npgray()
plotwc<-plotwc + stat_ellipse(type="norm",linetype=2)+theme(legend.position="top")
plotwc

ggsave("MDSPCOA_weighted_UNIFRAC_2_color.pdf",width=8,height=6)
#Removing the legend for the next plot with grid arrange
plotwc<-plotwc+guides(color=FALSE)

#Ordination using unweighted unifrac and Principal Coordinate Analysis (PCoA)
ordub<-ordinate(MT1, "PCoA", "unifrac", weighted=FALSE)
#Uncomment for plotting the ordination results in black and white
#puw<-plot_ordination(MT1, ordub, shape="LifeStage")
#plotuw<-puw+geom_point(size=3,alpha=0.5,fill="grey")+scale_shape_manual(values=c(15,0,16,1,6))+labs(shape="LifeStage")
#plotuw<-plotuw+ggtitle("Community membership")+theme_npgray()
#plotuw<-plotuw + stat_ellipse(type="norm",linetype=2)+theme(legend.position="top")
#plotuw
#ggsave("MDSPCOA_unweighted_UNIFRAC_2.pdf",width=8,height=6)
#Removing the legend for the next plot with grid arrange
#plotuw<-plotuw+guides(shape=FALSE)
#plotuw

# Plotting the ordination results in color
puwc<-plot_ordination(MT1, ordub, color="LifeStage")
plotuwc<-puwc+geom_point(size=3,alpha=0.5)+scale_color_manual(values=c("#d73027","#fc8d59","#4d4d4d","#91bfdb","#4575b4"),breaks=c("Premetamorph","Prometamorph","Metamorph","Froglet","Adult"))
plotuwc<-plotuwc+ggtitle("Community membership")+theme_npgray()
plotuwc<-plotuwc + stat_ellipse(type="norm",linetype=2)+theme(legend.position="top")
plotuwc

ggsave("MDSPCOA_unweighted_UNIFRAC_2_color.pdf",width=8,height=6)
#Removing the legend for the next plot with grid arrange
plotuwc<-plotuwc+guides(color=FALSE)
plotuwc

ggsave("MDSPCOA_unweighted_UNIFRAC_2_color_nolegend.pdf",width=8,height=6)



# Side by side plots in color
pdf("plotMDSPCOA_wuw_2.pdf",width=12,height=6)
print(grid.arrange(plotuwc, plotwc, ncol=2))
## TableGrob (1 x 2) "arrange": 2 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
dev.off()
## quartz_off_screen 
##                 2
# Split plot
p4 <- plot_ordination(MT1, ordu, type="split", color="Phylum", shape="LifeStage")
gg_color_hue <- function(n){
    hues <- seq(15, 375, length=n+1)
    hcl(h=hues, l=65, c=100)[1:n]
}
color.names <- levels(p4$data$Phylum)
p4cols <- gg_color_hue(length(color.names))
names(p4cols) <- color.names
p4cols["samples"] <- "black"
p4+geom_point(size=3,alpha=0.75)+scale_shape_manual(values=c(19,15,0,16,1,6))+ scale_color_manual(values=p4cols)+theme_npgray()+labs(shape="LifeStage")

ggsave("MDSPCOA_weighted_UNIFRAC_split_2.pdf",width=14,height=7)
p5<-p4+geom_point(size=3,alpha=0.75)+scale_shape_manual(values=c(19,15,0,16,1,6))+ scale_color_manual(values=p4cols)+theme_npgray()+labs(shape="LifeStage")

p5+facet_wrap(~Phylum)

ggsave("MDSPCOA_weighted_UNIFRAC_split_wrap_2.pdf",width=12,height=12)


###########################################
# Comparing distances in ordination plots #
###########################################
#Loading the required function from phyloseq_extended
source("https://raw.githubusercontent.com/mahendra-mariadassou/phyloseq-extended/master/R/graphical_methods.R")
## Loading required package: scales
## Loading required package: reshape2
p1 <- plot_samples(MT1, ordinate(MT1, "MDS", "bray"), color = "LifeStage") + stat_ellipse(type="norm",linetype=2) + ggtitle("MDS + Bray-Curtis") + theme_bw()+guides(color=FALSE)
p2 <- plot_samples(MT1, ordinate(MT1, "MDS", "cc"), color = "LifeStage") + stat_ellipse(type="norm",linetype=2) + ggtitle("MDS + Jaccard") + theme_bw()+guides(color=FALSE)
p3 <- plot_samples(MT1, ordinate(MT1, "MDS", "unifrac"), color = "LifeStage") + stat_ellipse(type="norm",linetype=2) + ggtitle("MDS + UniFrac") + theme_bw()+guides(color=FALSE)
p4 <- plot_samples(MT1, ordinate(MT1, "MDS", "wunifrac"), color = "LifeStage") + stat_ellipse(type="norm",linetype=2) + ggtitle("MDS + weighted UniFrac") + theme_bw()+guides(color=FALSE)
grid.arrange(p1, p2, p3, p4, ncol = 2, nrow = 2)

pA <- plot_samples(MT1, ordinate(MT1, "PCoA", "unifrac"), color = "LifeStage") + stat_ellipse(type="norm",linetype=2) + ggtitle("PCoA + UniFrac") + theme_bw()+guides(color=FALSE)
pB <- plot_samples(MT1, ordinate(MT1, "PCoA", "wunifrac"), color = "LifeStage") + stat_ellipse(type="norm",linetype=2) + ggtitle("PCoA + weighted UniFrac") + theme_bw()+guides(color=FALSE)