#############################################
#               METATETARD                  #
# Community analysis of OTUs                #
# on developmental serie 16S rRNA GENE data #
# XPDEV                                     #
#############################################
#Exporting the data to build Venn diagram
library(phyloseq)
library(ggplot2)
library(phytools)
## Loading required package: ape
## Loading required package: maps
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Folders, Themes, colors
source("prelude.R")

# Setting up directories
data_dir <-paste0(data_dir_path,"xpdev")
output_dir <- paste0(output_dir_path,"Supp_Figure_2")

####################################################
#1 Importing abundance data from 100 rarefactions  #
####################################################

# Importing data
# Defining the directory containing the data to import
setwd(data_dir)
# Importing abundance data
# Working on abundance data from 100 rarefactions
comm<-read.table("xpdev_fichier_allcom.txt", header=TRUE, row.names=1)

# Getting rid of OTUs with zero abundance
comm<-comm %>% select_if((function(col) is.numeric(col) && sum(col) > 0))
comm<-t(comm)
#Importing metadata
metadata<-read.csv("nmt3_2_metadata_xp_dev.csv",sep=";",header=TRUE,row.names=1)
taxmat<-read.csv("nmt3_Gal18_abondance_tax.csv",sep=";",header=TRUE,row.names=1)
taxmat<-as.matrix(taxmat)
OTU = otu_table(comm, taxa_are_rows = TRUE)
TAX = tax_table(taxmat)
sample_data = sample_data(metadata)
#Importing phylogeny
phy<-read.tree("nmt3_sequences_mpr.tree")
#Rooting the tree using midpoint rooting
phy<-midpoint.root(phy)
physeq.ini = merge_phyloseq(OTU,sample_data,TAX,phy)

#2 - We merge samples belonging to the same Life Stage category
# Transform all variables to factors for merging
df <- as.data.frame(lapply(sample_data(physeq.ini),function (y) if(class(y)!="factor" ) as.factor(y) else y),stringsAsFactors=T)
row.names(df) <- sample_names(physeq.ini)
sample_data(physeq.ini) <- sample_data(df)

physeq.ini.m1 = merge_samples(physeq.ini, "LifeStage")

###########################
#3 Data source :          #
# rarefied abundance      #
###########################
venna<-otu_table(physeq.ini.m1)
venna<-t(venna)
setwd(output_dir)
write.table(venna,"xpdev_merged_otu_abundance_a.tab")

#####################################
#4 Data source :                    #
# rarefied and filtered abundance   #
#  abundance > 1% in                #
# at least 10% of samples           #
#####################################

physeq.ini.Rm = transform_sample_counts(physeq.ini, function(x) 100 * x/sum(x))
physeq.ini.Rfm = filter_taxa(physeq.ini.Rm,function(x) sum(x>1)  > (0.1*length(x)),TRUE)
# Transform all variables to factors for merging
df <- as.data.frame(lapply(sample_data(physeq.ini.Rfm),function (y) if(class(y)!="factor" ) as.factor(y) else y),stringsAsFactors=T)
row.names(df) <- sample_names(physeq.ini.Rfm)
sample_data(physeq.ini.Rfm) <- sample_data(df)

physeq.ini.m2 = merge_samples(physeq.ini.Rfm, "LifeStage")
vennb<-otu_table(physeq.ini.m2)
vennb<-t(vennb)
write.table(vennb,"xpdev_merged_otu_abundance_b.tab")

##################################
#5 Data source                   #
#  raw abundance counts          #
##################################
setwd(data_dir)
#Importing abundance data
comm<-read.table("nmt3_abondance_tpn_notax_XP_Dev.asc",header=TRUE,row.names=1)
# Getting rid of OTUs with zero abundance
comm<-comm %>% select_if((function(col) is.numeric(col) && sum(col) > 0))

comm<-t(comm)
#Importing  metadata
metadata<-read.csv("nmt3_2_metadata_xp_dev.csv",sep=";",header=TRUE,row.names=1)
taxmat<-read.csv("nmt3_Gal18_abondance_tax.csv",sep=";",header=TRUE,row.names=1)
taxmat<-as.matrix(taxmat)
OTU = otu_table(comm, taxa_are_rows = TRUE)
TAX = tax_table(taxmat)
sample_data = sample_data(metadata)
#Importing phylogeny
phy<-read.tree("nmt3_sequences_mpr.tree")
#Rooting the tree using midpoint rooting
phy<-midpoint.root(phy)
physeq.ini = merge_phyloseq(OTU,sample_data,TAX,phy)
#2 - We merge samples corresponding to the same Developmental Stage category
# Transform all variables to factors for merging
df <- as.data.frame(lapply(sample_data(physeq.ini),function (y) if(class(y)!="factor" ) as.factor(y) else y),stringsAsFactors=T)
row.names(df) <- sample_names(physeq.ini)
sample_data(physeq.ini) <- sample_data(df)

physeq.ini.m3 = merge_samples(physeq.ini, "LifeStage")
vennc<-otu_table(physeq.ini.m3)
vennc<-t(vennc)
setwd(output_dir)
write.table(vennc,"xpdev_merged_otu_abundance_c.tab")