# Message and warning turned off for rendering
# Analyse par phyloseq
library(phyloseq)
library(ggplot2)
library(phytools)
library(dplyr)
library(gridExtra)
library(usedist)
library(PMCMRplus)
library(multcompView)
library(picante)
# Warning turned off for rendering
## Folders, Themes, colors
source("prelude.R")
# Setting up directories
data_dir <-paste0(data_dir_path,"xptransmission")
output_dir <- paste0(output_dir_path,"Figure6_transmission")
# Importing data
# Defining the directory containing the data to import
setwd(data_dir)
#######################
# Ordination plots #
#######################
# Working on abundance data from 100 rarefactions
comm<-read.table("xptrans_fichier_allcom.txt",header=TRUE,row.names=1)
# Getting rid of OTUs with zero abundance
comm<-comm %>% select_if((function(col) is.numeric(col) && sum(col) > 0))
comm<-t(comm)
#Importing metadata
metadata<-read.csv("nmt3_metadata_XP_transa.csv",sep=";",header=TRUE,row.names=1)
taxmat<-read.csv("nmt3_Gal18_abondance_tax.csv",sep=";",header=TRUE,row.names=1)
taxmat<-as.matrix(taxmat)
OTU<-otu_table(comm, taxa_are_rows = TRUE)
TAX<-tax_table(taxmat)
sample_data<-sample_data(metadata)
#Importing phylogeny
phy<-read.tree("nmt3_sequences_mpr.tree")
#Rooting the tree using midpoint rooting
phy<-midpoint.root(phy)
physeq.ini = merge_phyloseq(OTU,sample_data,TAX,phy)
##Setting the working directory for output files
setwd(output_dir)
# We keep OTUs seen more than 5 times in 30% of samples
MTT <- filter_taxa(physeq.ini, function(x) sum(x > 5) > (0.3*length(x)), TRUE)
# How many OTUs in our starting data ?
length(row.names(otu_table(physeq.ini)))
## [1] 648
# How many OTUs have been filtered ?
length(row.names(otu_table(MTT)))
## [1] 382
# Il y a 382 OTUs sur 648 qui passent le filtre d'abondance et de prévalence.
# We convert into relative abundance and filter OTUs with abundance of more than 0.001%
# This filtering step should not further reduce the number of OTUs
count2proportion <- function(x){
x[(x / sum(x)) < (1e-5)] <- 0
return(x)
}
MTT<-transform_sample_counts(MTT,fun=count2proportion)
ordu<-ordinate(MTT, "PCoA", "unifrac", weighted=TRUE)
pw<-plot_ordination(MTT, ordu, shape="XP_Tissue")
plotw<-pw+geom_point(size=3,stroke=2)+scale_shape_manual(values=c(21,22,25),guide_legend(title=""))
plotw<-plotw+ggtitle("Community structure")+theme_npgray()+theme(legend.position="top")+scale_color_grey(guide_legend(title=""))+ stat_ellipse(type="norm",linetype=2)
plotw
ggsave("MDSPCOA_weighted_UNIFRAC.pdf",width=8,height=6)
#Removing the legend for the next plot with grid arrange
plotwf<-plotw+guides(shape=FALSE)
ordub<-ordinate(MTT, "PCoA", "unifrac", weighted=FALSE)
puw<-plot_ordination(MTT, ordub, shape="XP_Tissue")
plotuw<-puw+geom_point(size=3,stroke=2)+scale_shape_manual(values=c(21,22,25),guide_legend(title=""))
plotuw<-plotuw+ggtitle("Community membership")+scale_color_grey(guide_legend(title=""))+theme_npgray()+theme(legend.position="top")+ stat_ellipse(type="norm",linetype=2)
plotuw
ggsave("MDSPCOA_unweighted_top_UNIFRAC.pdf",width=9,height=7)
#Removing the legend for the next plot with grid arrange
plotuwf<-plotuw+guides(shape=FALSE)
grid.arrange(plotuwf, plotwf, ncol=2)
# Saving plot as a pdf file
#pdf("plotMDSPCOA_wuw_grey.pdf",width=12,height=6)
#print(grid.arrange(plotuwf, plotwf, ncol=2))
#dev.off()
###########################################
# Comparing distances in ordination plots #
###########################################
#Loading the required function from phyloseq_extended
source("https://raw.githubusercontent.com/mahendra-mariadassou/phyloseq-extended/master/R/graphical_methods.R")
## Loading required package: scales
## Loading required package: reshape2
p1 <- plot_samples(MTT, ordinate(MTT, "MDS", "cc"), color = "XP_Tissue") + ggtitle("MDS + Jaccard") +guides(color=FALSE)
p2 <- plot_samples(MTT, ordinate(MTT, "MDS", "unifrac"), color = "XP_Tissue") + ggtitle("MDS + UniFrac") +guides(color=FALSE)
p3 <- plot_samples(MTT, ordinate(MTT, "MDS", "bray"), color = "XP_Tissue") + ggtitle("MDS + Bray-Curtis") +guides(color=FALSE)
p4 <- plot_samples(MTT, ordinate(MTT, "MDS", "wunifrac"), color = "XP_Tissue") + ggtitle("MDS + weighted UniFrac") +guides(color=FALSE)
grid.arrange(p1, p2, p3, p4, ncol = 2, nrow = 2)
#### Boxplot of Unifrac and Weighted Unifrac distances
#Unifrac distances grouped by tissues
unifrac<-distance(physeq.ini,method="unifrac")
df_unifrac_XP_tissue<-dist_groups(unifrac,metadata$XP_Tissue)
# Significance of unweighted unifrac distances grouped by tissues
kw_unifrac_XP_tissue<-kruskal.test(df_unifrac_XP_tissue$Distance, df_unifrac_XP_tissue$Label,data=df_unifrac_XP_tissue)
pp_uni<-kwAllPairsDunnTest(x=df_unifrac_XP_tissue$Distance, df_unifrac_XP_tissue$Label,p.adjust.method="bonferroni")
# Making letters matching the significance level
mymat_uni<-tri.to.squ(pp_uni$p.value)
myletters_uni<- multcompLetters(mymat_uni,compare="<=",threshold=0.05,Letters=letters)
# Store the letters in a dataframe
myletters_uni_df <- data.frame(Sample=names(myletters_uni$Letters),letter = myletters_uni$Letters )
#Boxplot with points
ggplot(df_unifrac_XP_tissue,aes(x=Label,y=Distance))+geom_jitter(width=0.2,height=0.01,size=3,colour="darkgrey",alpha=0.9)+geom_boxplot(outlier.colour=NA,alpha=0.1)+geom_text(data = myletters_uni_df, aes(label = letter, x=Sample,y = 0.81 ),size=6,fontface="bold")+scale_x_discrete(limits=c("Between Eggs and Feces","Between Eggs and Skin","Between Feces and Skin","Within Eggs","Within Feces","Within Skin"),labels=c("Eggs-Feces","Eggs-Skin","Feces-Skin","Eggs","Skin","Feces"))+ylab("Unifrac")+xlab("Samples")+theme_npgray()
ggsave("plot_unifrac_distances_samples.pdf",width=12,height=8)
#Boxplot only
ggplot(df_unifrac_XP_tissue,aes(x=Label,y=Distance))+geom_boxplot(outlier.colour=NA,alpha=0.1)+geom_text(data = myletters_uni_df, aes(label = letter, x=Sample,y = 0.81 ),size=6,fontface="bold")+scale_x_discrete(limits=c("Between Eggs and Feces","Between Eggs and Skin","Between Feces and Skin","Within Eggs","Within Feces","Within Skin"),labels=c("Eggs-Feces","Eggs-Skin","Feces-Skin","Eggs","Skin","Feces"))+ylab("Unifrac")+xlab("Samples")+theme_npgray()+guides(fill=FALSE)
#Weighted Unifrac distances grouped by tissues
wunifrac<-distance(physeq.ini,method="wunifrac")
df_wunifrac_XP_tissue<-dist_groups(wunifrac,metadata$XP_Tissue)
# Significance of weighted unifrac distances grouped by tissues
kw_wunifrac_XP_tissue<-kruskal.test(df_wunifrac_XP_tissue$Distance, df_unifrac_XP_tissue$Label,data=df_wunifrac_XP_tissue)
pp_wuni<-kwAllPairsDunnTest(x=df_wunifrac_XP_tissue$Distance, df_wunifrac_XP_tissue$Label,p.adjust.method="bonferroni")
#With points
ggplot(df_wunifrac_XP_tissue,aes(x=Label,y=Distance))+geom_jitter(width=0.2,height=0.01,size=3,colour="darkgrey",alpha=0.9)+geom_boxplot(outlier.colour=NA,alpha=0.1)+scale_x_discrete(limits=c("Between Eggs and Feces","Between Eggs and Skin","Between Feces and Skin","Within Eggs","Within Feces","Within Skin"),labels=c("Eggs-Feces","Eggs-Skin","Feces-Skin","Eggs","Skin","Feces"))+ylab("Weighted Unifrac")+xlab("Samples")+theme_npgray()
ggsave("plot_wunifrac_distances_samples.pdf",width=12,height=8)
#Boxplot only
ggplot(df_wunifrac_XP_tissue,aes(x=Label,y=Distance,fill=Label))+geom_boxplot(outlier.colour=NA,alpha=0.1)+scale_x_discrete(limits=c("Between Eggs and Feces","Between Eggs and Skin","Between Feces and Skin","Within Eggs","Within Feces","Within Skin"),labels=c("Eggs-Feces","Eggs-Skin","Feces-Skin","Eggs","Skin","Feces"))+ylab("Weighted Unifrac")+xlab("Samples")+theme_npgray()+guides(fill=FALSE)
#Centroid distances relative to tissues
df_centroids_unifrac_XP_tissue<-dist_to_centroids(unifrac, metadata$XP_Tissue)
####################################################
# Figure barplot proportion of most abundant phyla #
####################################################
#2 - We fuse taxa at the phylum level
physeq.ini.Rg<-tax_glom(physeq.ini, taxrank="Phylum")
#3 - Transforming abundance counts into proportions
physeq.ini.Rgm = transform_sample_counts(physeq.ini.Rg, function(x) 100 * x/sum(x))
#4 - Filtering OTUs
#We keep OTUs with at least 1% relative abundance in a given sample
physeq.ini.Rfgm = filter_taxa(physeq.ini.Rgm,function(x) mean(x)>1,TRUE)
#5 - We merge samples corresponding to the same Developmental Stage category
# Transform all variables to factors for merging (trick required for R4.0)
df <- as.data.frame(lapply(sample_data(physeq.ini.Rfgm),function (y) if(class(y)!="factor" ) as.factor(y) else y),stringsAsFactors=T)
row.names(df) <- sample_names(physeq.ini.Rfgm)
sample_data(physeq.ini.Rfgm) <- sample_data(df)
#Merging
physeq.ini.Rfgm2 = merge_samples(physeq.ini.Rfgm, "XP_Tissue")
#6 - We recompute the relative abundance because the sample merge sums up the relative abundance of each merged sample
physeq.ini.Rf2gm2 = transform_sample_counts(physeq.ini.Rfgm2, function(x) 100 * x/sum(x))
# We need to recover the good legends corresponding to the category used for merging samples
sample_data(physeq.ini.Rf2gm2)$XP_Tissue <- levels(as.factor(sample_data(physeq.ini.Rg)$XP_Tissue))
#7 - We can go on with the abundance plot itself
# Defining the output dir
setwd(output_dir)
# Plotting
plot_tissue_compo<-plot_bar(physeq.ini.Rf2gm2, "XP_Tissue", fill = "Phylum")+ ylab("Percentage of Sequences")+ scale_fill_manual(values=jcolors)+theme_npgray()+theme(axis.text.x = element_text(angle = 0,hjust=0.5))+scale_x_discrete(limits=c("Eggs","Skin","Feces"), labels=c("Eggs","Skin","Feces"))+xlab("Tissues")
plot_tissue_compo
ggsave("plot_tissue_compo.pdf",width=8,height=6)
#8 Plotting by phylum
p<-plot_bar(physeq.ini.Rf2gm2,facet_grid=~Phylum,fill="Phylum")
p2<-plot_bar(physeq.ini.Rf2gm2, "XP_Tissue", fill = "Phylum")+geom_label(aes(label= scales::number(Abundance, accuracy = 0.1),fill = factor(Phylum)), colour = "white", fontface = "bold")+ ylab("Percentage of Sequences")+ scale_fill_manual(values=jcolors)+theme_npgray()+theme(axis.text.x = element_text(angle = 0,hjust=0.5))+scale_x_discrete(limits=c("Eggs","Skin","Feces"), labels=c("Eggs","Skin","Feces"))+xlab("Tissues")+facet_wrap(~Phylum)
p2
ggsave("plot_XP_Tissue_compophylum.pdf",width=9,height=5)
##################
## Family level ##
##################
#2 - We fuse taxa at the family level
physeq.ini.Rg<-tax_glom(physeq.ini, taxrank="Family")
#3 - Transforming abundance counts into proportions
physeq.ini.Rgm = transform_sample_counts(physeq.ini.Rg, function(x) 100 * x/sum(x))
#4 - Filtering OTUs
#We keep OTUs with at least 1% relative abundance in a given sample
physeq.ini.Rfgm = filter_taxa(physeq.ini.Rgm,function(x) mean(x)>0.5,TRUE)
#5 - We merge samples corresponding to the same Developmental Stage category
# Transform all variables to factors for merging (trick required for R4.0)
df <- as.data.frame(lapply(sample_data(physeq.ini.Rfgm),function (y) if(class(y)!="factor" ) as.factor(y) else y),stringsAsFactors=T)
row.names(df) <- sample_names(physeq.ini.Rfgm)
sample_data(physeq.ini.Rfgm) <- sample_data(df)
#Merging
physeq.ini.Rfgm2 = merge_samples(physeq.ini.Rfgm, "XP_Tissue")
#6 - We recompute the relative abundance because the sample merge sums up the relative abundance of each merged sample
physeq.ini.Rf2gm2 = transform_sample_counts(physeq.ini.Rfgm2, function(x) 100 * x/sum(x))
# We need to recover the good legends corresponding to the category used for merging samples
sample_data(physeq.ini.Rf2gm2)$XP_Tissue <- levels(as.factor(sample_data(physeq.ini.Rg)$XP_Tissue))
#7 - We can go on with the abundance plot itself
plot_bar(physeq.ini.Rf2gm2, "XP_Tissue", fill = "Phylum")+geom_text(aes(label= scales::number(Abundance, accuracy = 0.1)),hjust = -0.2, fontface = "bold")+ylab("Percentage of Sequences")+theme_npgray()+theme(axis.text.x = element_text(angle = 0,hjust=0.5),legend.position="top",strip.text=element_text(face="bold",size=rel(0.7)),strip.background=element_rect(fill="white"))+scale_x_discrete(limits=c("Eggs","Skin","Feces"), labels=c("Eggs","Skin","Feces"))+facet_wrap(~Family,ncol=6)+coord_flip()+xlab("Tissues")
############################
# Plot on selected phyla #
############################
physeq.ini.Rgm = transform_sample_counts(physeq.ini.Rg, function(x) 100 * x/sum(x))
#Bacteroidetes
bacteroidetes<- subset_taxa(physeq.ini.Rgm, Phylum=="Bacteroidetes")
pbacteroidetes<-plot_tree(bacteroidetes, color="XP_Tissue", label.tips="Family", size="abundance", plot.margin=0.3,text.size=4)
pbacteroidetes
#Firmicutes
firmicutes<- subset_taxa(physeq.ini.Rg, Phylum=="Firmicutes")
pfirmicutes<-plot_tree(firmicutes, color="XP_Tissue", label.tips="Family", size="abundance", plot.margin=0.3,text.size=4)
pfirmicutes
#Alphaproteobacteria
Aproteobacteria<- subset_taxa(physeq.ini.Rg, Class=="Alphaproteobacteria")
pAproteobacteria<-plot_tree(Aproteobacteria, color="XP_Tissue", label.tips="Family", size="abundance", plot.margin=0.3,text.size=4)
pAproteobacteria
#Betaproteobacteria
Bproteobacteria<- subset_taxa(physeq.ini.Rg, Class=="Betaproteobacteria")
pBproteobacteria<-plot_tree(Bproteobacteria, color="XP_Tissue", label.tips="Family", size="abundance", plot.margin=0.3,text.size=4)
pBproteobacteria
#Deltaproteobacteria
Dproteobacteria<- subset_taxa(physeq.ini.Rg, Class=="Deltaproteobacteria")
pDproteobacteria<-plot_tree(Dproteobacteria, color="XP_Tissue", label.tips="Family", size="abundance", plot.margin=0.3,text.size=4)
pDproteobacteria
#Gammaproteobacteria
Gproteobacteria<- subset_taxa(physeq.ini.Rg, Class=="Gammaproteobacteria")
pGproteobacteria<-plot_tree(Gproteobacteria, color="XP_Tissue", label.tips="Family", size="abundance", plot.margin=0.3,text.size=4)
pGproteobacteria
##############################################
# Test the significance of unifrac distances #
##############################################
# beta diversity: compute unweighted unifrac distance
uudist<-distance(physeq.ini,method="uunifrac")
adonis(uudist~XP_Tissue,data=metadata)
##
## Call:
## adonis(formula = uudist ~ XP_Tissue, data = metadata)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## XP_Tissue 2 1.9594 0.97969 14.891 0.46693 0.001 ***
## Residuals 34 2.2369 0.06579 0.53307
## Total 36 4.1963 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# beta diversity: compute weighted unifrac distance
uwdist<-distance(physeq.ini,method="wunifrac")
adonis(uwdist~XP_Tissue,data=metadata)
##
## Call:
## adonis(formula = uwdist ~ XP_Tissue, data = metadata)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## XP_Tissue 2 3.5834 1.79169 41.849 0.71112 0.001 ***
## Residuals 34 1.4557 0.04281 0.28888
## Total 36 5.0390 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# unweighted unifrac distances grouped by tissues
kw_unifrac_XP_tissue<-kruskal.test(df_unifrac_XP_tissue$Distance, df_unifrac_XP_tissue$Label,data=df_unifrac_XP_tissue)
kw_unifrac_XP_tissue
##
## Kruskal-Wallis rank sum test
##
## data: df_unifrac_XP_tissue$Distance and df_unifrac_XP_tissue$Label
## Kruskal-Wallis chi-squared = 430.63, df = 5, p-value < 2.2e-16
pp_uni<-kwAllPairsDunnTest(x=df_unifrac_XP_tissue$Distance, df_unifrac_XP_tissue$Label,p.adjust.method="bonferroni")
pp_uni
##
## Pairwise comparisons using Dunn's all-pairs test
## data: df_unifrac_XP_tissue$Distance and df_unifrac_XP_tissue$Label
## Between Eggs and Feces Between Eggs and Skin
## Between Eggs and Skin < 2e-16 -
## Between Feces and Skin 1.000 < 2e-16
## Within Eggs 7.2e-14 1.000
## Within Feces < 2e-16 0.019
## Within Skin < 2e-16 0.474
## Between Feces and Skin Within Eggs Within Feces
## Between Eggs and Skin - - -
## Between Feces and Skin - - -
## Within Eggs 2.3e-15 - -
## Within Feces < 2e-16 1.000 -
## Within Skin < 2e-16 1.000 1.000
##
## P value adjustment method: bonferroni
## alternative hypothesis: two.sided
# weighted unifrac distances grouped by tissues
kw_wunifrac_XP_tissue<-kruskal.test(df_wunifrac_XP_tissue$Distance, df_unifrac_XP_tissue$Label,data=df_wunifrac_XP_tissue)
kw_wunifrac_XP_tissue
##
## Kruskal-Wallis rank sum test
##
## data: df_wunifrac_XP_tissue$Distance and df_unifrac_XP_tissue$Label
## Kruskal-Wallis chi-squared = 447.26, df = 5, p-value < 2.2e-16
pp_wuni<-kwAllPairsDunnTest(x=df_wunifrac_XP_tissue$Distance, df_wunifrac_XP_tissue$Label,p.adjust.method="bonferroni")
pp_wuni
##
## Pairwise comparisons using Dunn's all-pairs test
## data: df_wunifrac_XP_tissue$Distance and df_wunifrac_XP_tissue$Label
## Between Eggs and Feces Between Eggs and Skin
## Between Eggs and Skin <2e-16 -
## Between Feces and Skin 1.000 <2e-16
## Within Eggs <2e-16 1.000
## Within Feces <2e-16 0.297
## Within Skin <2e-16 1.000
## Between Feces and Skin Within Eggs Within Feces
## Between Eggs and Skin - - -
## Between Feces and Skin - - -
## Within Eggs <2e-16 - -
## Within Feces <2e-16 0.068 -
## Within Skin <2e-16 1.000 0.358
##
## P value adjustment method: bonferroni
## alternative hypothesis: two.sided