#Message and warning turned off for rendering
#Analysis using Deseq2 and phyloseq
library(phyloseq)
library(DESeq2)
library(ggplot2)
library(phytools)
library(dplyr)
## Folders, Themes, colors
source("prelude.R")

# Setting up directories
data_dir <-paste0(data_dir_path,"xprna")
output_dir <- paste0(output_dir_path,"Figure4_rna")

# Importing data
# Defining the directory containing the data to import
setwd(data_dir)

# Importing abundance data
comm<-read.table("nmt3_abondance_early_tpn_notax.csv",header=TRUE,row.names=1,sep=";")
# Getting rid of OTUs with zero abundance
comm<-comm %>% select_if((function(col) is.numeric(col) && sum(col) > 0))
comm<-t(comm)
#Importing metadata
metadata<-read.csv("nmt3_metadata_rna.csv",sep=";",header=TRUE,row.names=1)

# To fix the order of life stages
metadata$Stage_sorted=factor(metadata$Stage,levels=c("SL_early","TGA_early","SL_late","TGA_late"))

taxmat<-read.csv("nmt3_Gal18_abondance_tax.csv",sep=";",header=TRUE,row.names=1)
taxmat<-as.matrix(taxmat)
OTU = otu_table(comm, taxa_are_rows = TRUE)
TAX = tax_table(taxmat)
sample_data = sample_data(metadata)
#Importing phylogeny
phy<-read.tree("nmt3_sequences_mpr.tree")
#Rooting the tree using midpoint rooting
phy<-midpoint.root(phy)
physeq.ini = merge_phyloseq(OTU,sample_data,TAX,phy)

# We keep OTUs seen more than 100 times in 20% of samples
physeq.ini.f<-filter_taxa(physeq.ini, function(x) sum(x > 10) >= (0.2*length(x)), TRUE)

# How many OTUs in the starting dataset ?
length(row.names(otu_table(physeq.ini)))
## [1] 628
# How many OTUs have been filtered ?
length(row.names(otu_table(physeq.ini.f)))
## [1] 152
# Il y a 30 OTUs sur 628 qui passent le filtre d'abondance et de prévalence.

# Performing the differential analysis using DESeq2
diagdds <- phyloseq_to_deseq2(physeq.ini.f, ~ DevelopmentalStage)
## converting counts to integer mode
diagdds <- DESeq(diagdds, test="Wald", fitType="local")
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
res <- results(diagdds, cooksCutoff = FALSE)
alpha <- 0.01
sigtab <- res[which(res$padj < alpha), ]
sigtab <- cbind(as(sigtab, "data.frame"), as(tax_table(physeq.ini)[rownames(sigtab), ], "matrix"))
sigtab <- sigtab %>% tibble::rownames_to_column(var="OTU_ID")
head(sigtab)
##        OTU_ID   baseMean log2FoldChange     lfcSE      stat       pvalue
## 1  Cluster_23 228.841970      -4.571593 1.1426555 -4.000850 6.311542e-05
## 2  Cluster_12 127.124760      -2.416386 0.4772733 -5.062898 4.129319e-07
## 3 Cluster_132   9.668712      -3.623371 0.9766018 -3.710183 2.071096e-04
## 4 Cluster_232 122.431304     -19.029768 3.6139283 -5.265674 1.396759e-07
## 5 Cluster_163   9.249369      -4.021499 0.9391085 -4.282252 1.850116e-05
## 6  Cluster_46  34.503069      -2.572627 0.6568882 -3.916386 8.988642e-05
##           padj   Domain          Phylum               Class              Order
## 1 1.535808e-03 Bacteria   Bacteroidetes          Cytophagia       Cytophagales
## 2 2.009602e-05 Bacteria   Bacteroidetes      Flavobacteriia   Flavobacteriales
## 3 3.779751e-03 Bacteria   Bacteroidetes      Flavobacteriia   Flavobacteriales
## 4 1.019634e-05 Bacteria Verrucomicrobia    Verrucomicrobiae Verrucomicrobiales
## 5 5.402340e-04 Bacteria  Proteobacteria Alphaproteobacteria    Caulobacterales
## 6 1.874774e-03 Bacteria  Proteobacteria  Betaproteobacteria    Burkholderiales
##                Family            Genus              Species
## 1       Cytophagaceae   Flectobacillus Flectobacillusroseus
## 2   Flavobacteriaceae   Flavobacterium    Multi-affiliation
## 3   Flavobacteriaceae Chryseobacterium  Chryseobacteriumsp.
## 4 Verrucomicrobiaceae      Akkermansia       unknownspecies
## 5    Caulobacteraceae    Brevundimonas    Multi-affiliation
## 6    Oxalobacteraceae         Massilia       unknownspecies
# Defining the output directory
setwd(output_dir)

# Phylum order
x <- tapply(sigtab$log2FoldChange, sigtab$Phylum, function(x) max(x))
x <- sort(x, TRUE)
sigtab$Phylum <- factor(as.character(sigtab$Phylum), levels=names(x))
ggplot(sigtab, aes(y=Phylum, x=log2FoldChange, color=Phylum)) + geom_point(size=4,colour="black")+geom_point(size=3,alpha=0.7)+theme(axis.text.y = element_text(size=rel(0.8)))+scale_colour_manual(values=jcolors)+theme_npgray()

# Family order
ggplot(sigtab, aes(y=Family, x=log2FoldChange, color=Phylum)) + geom_point(size=4,colour="black")+geom_point(size=3,alpha=0.7)+theme(axis.text.y = element_text(size=rel(0.8)))+scale_colour_manual(values=jcolors)+theme_npgray()+facet_grid(~Phylum)+guides(colour=FALSE)+ylab("Family")

ggsave("plot_deseq_rna_by_family.pdf",width=12,height=9)


# Genus order
x <- tapply(sigtab$log2FoldChange, sigtab$Genus, function(x) max(x))
x <- sort(x, TRUE)
sigtab$Genus <- factor(as.character(sigtab$Genus), levels=names(x))
ggplot(sigtab, aes(y=reorder(Genus,log2FoldChange), x=log2FoldChange, color=Phylum)) + geom_point(size=4,colour="black") + geom_point(size=3,alpha=0.7)+theme(axis.text.y = element_text(size=rel(0.8)))+scale_colour_manual(values=jcolors)+theme_npgray()+facet_grid(~Phylum)+guides(colour=FALSE)+ylab("Genus")

ggsave("plot_deseq_rna.pdf",width=12,height=9)

# Presentation using a combinaison of taxonomy and OTUID
sigtab<-mutate(sigtab,otu2genus=paste(Family,Genus,OTU_ID,sep="_"))
ggplot(sigtab, aes(y=reorder(sigtab$otu2genus,log2FoldChange), x=log2FoldChange, color=Phylum)) + geom_point(size=4,colour="black") + geom_point(size=3,alpha=0.7)+theme(axis.text.y = element_text(size=rel(0.8)))+scale_colour_manual(values=jcolors)+theme_npgray()+facet_grid(~Phylum)+guides(colour=FALSE)+ylab("OTUs")

ggsave("plot_deseq_rna_OTU2tax.pdf",width=14,height=9)

# Presentation using only OTUID
ggplot(sigtab, aes(y=reorder(sigtab$OTU_ID,log2FoldChange), x=log2FoldChange, color=Phylum)) + geom_point(size=4,colour="black") + geom_point(size=3,alpha=0.7)+theme(axis.text.y = element_text(size=rel(0.8)))+scale_colour_manual(values=jcolors)+theme_npgray()+facet_grid(~Phylum)+guides(colour=FALSE)+ylab("OTUs")

ggsave("plot_deseq_rna_OTU.pdf",width=12,height=9)

# Selection of differential OTU
my_de_subset <- subset(otu_table(physeq.ini.f),rownames(otu_table(physeq.ini.f)) %in% sigtab$OTU_ID)
physeq.de.ini <- merge_phyloseq(my_de_subset, tax_table(physeq.ini.f), sample_data(physeq.ini.f),phy_tree(physeq.ini.f))



# Heatmap on differential OTUs

# Heatmap without clustering and sorting samples according to their developmental stage
plot_heatmap(physeq.de.ini,sample.label="DevelopmentalStage",low="#66CCFF", high="#000033", na.value="white")

ggsave("plot_heatmap_de_noclustering.pdf",height=6,width=12)

life_stage_order<-c("SD-TGA-PE","SD-SL-PE","SD-TGA-NF41","SD-SL-NF41","SD-TGA-NF48","SD-SL-NF48","SD-TGA-NF50","SD-SL-NF50","SD-TGA-NF52","SD-SL-NF52","SD-TGA-iNF52","SD-SL-iNF52","SD-TGA-iNF56","SD-SL-iNF56","SD-TGA-iNF60","SD-SL-iNF60","SD-TGA-iNF63","SD-SL-iNF63","SD-TGA-iNF66","SD-SL-iNF66")
plot_heatmap(physeq.de.ini,sample.label="DevelopmentalStage",low="#66CCFF", high="#000033", na.value="white")+scale_x_discrete(limits=life_stage_order)
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.

# Heatmap using PCoA for community composition
plot_heatmap(physeq.de.ini,method="PCoA",distance="unifrac",sample.label="DevelopmentalStage",low="#66CCFF", high="#000033", na.value="white") + ylab("OTUs")+ theme_npgray()+ theme(axis.text.x = element_text(angle = 45, hjust = 1))+facet_grid(~Stage_sorted,scale="free_x", space="free_x")

ggsave("plot_heatmap_de_clustering.pdf",height=6,width=12)


# We take a look of the abundance of OTUs identified as differential in the development seie.
# The list corresponds to sigtab$OTU_ID from the deseq2 analysis of the Development experiment.
dev_deOTUS=c("Cluster_13","Cluster_47","Cluster_153","Cluster_144","Cluster_10","Cluster_2","Cluster_6","Cluster_1","Cluster_3","Cluster_53","Cluster_123","Cluster_68","Cluster_40","Cluster_85","Cluster_77","Cluster_88","Cluster_11","Cluster_17","Cluster_228","Cluster_25","Cluster_60","Cluster_51")
my_de_subset <- subset(otu_table(physeq.ini),rownames(otu_table(physeq.ini)) %in% dev_deOTUS)
physeq.deDEV.ini <- merge_phyloseq(my_de_subset, tax_table(physeq.ini), sample_data(physeq.ini),phy_tree(physeq.ini))
plot_heatmap(physeq.deDEV.ini,sample.label="Description",low="#66CCFF", high="#000033", na.value="white")+scale_x_discrete(limits= life_stage_order)
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.