本节我们来绘制微生物的物种组成图;先通过phyloseq包整合数据,之后利用MicrobiotaProcess包绘制门水平物种组成图,最后通过ggplot2自定义绘制物种组成图,喜欢的小伙伴可以关注我的公众号R语言数据分析指南,后台回复关键词物种组成图获取全部数据及代码。
加载必须R包
rm(list=ls())
library(pacman)
library(magrittr)
library(reshape2)
pacman::p_load(tidyverse,phyloseq,MicrobiotaProcess,ape)
整合数据
otu_mat <- read.delim2("otu_table.tsv",header=T,
sep="\t",check.names = F,row.names = 1) %>%
as.matrix()
tax_mat <- read.delim("taxa.xls",header=T,row.names = 1,
sep="\t",check.names = F) %>% as.matrix()
samples_df <- read.delim("group.xls",header = T,row.names = 1,
sep="\t",check.names = F)
tree <- read.tree("rooted_tree.tre")
OTU = otu_table(otu_mat,taxa_are_rows =T)
TAX = tax_table(tax_mat)
samples = sample_data(samples_df)
ps <- phyloseq(OTU,TAX,samples,tree)
ps
> ps
phyloseq-class experiment-level object
otu_table() OTU Table: [ 3761 taxa and 12 samples ]
sample_data() Sample Data: [ 12 samples by 1 sample variables ]
tax_table() Taxonomy Table: [ 3761 taxa by 7 taxonomic ranks ]
phy_tree() Phylogenetic Tree: [ 3761 tips and 3760 internal nodes ]
MicrobiotaProcess包进行数据可视化
phytax <- get_taxadf(obj=ps, taxlevel=2)
phybar <- ggbartax(obj=phytax,facetNames="group", count=FALSE) +
xlab(NULL) + ylab("relative abundance (%)")+
theme(axis.text.x=element_text(face="plain",
color="black",hjust=0.8,vjust=0.6,
size=9, angle=90))+
theme(strip.text.x = element_text(size=8, color="black",
face="plain"))+
theme(legend.position="right")
phybar
ggplot2自定义绘图
定义颜色
colors <-c("#E41A1C","#1E90FF","#FF8C00","#4DAF4A","#984EA3",
"#40E0D0","#FFC0CB","#00BFFF","#FFDEAD","#90EE90",
"#EE82EE","#00FFFF","#F0A3FF", "#0075DC",
"#993F00","#4C005C","#2BCE48","#FFCC99",
"#808080","#94FFB5","#8F7C00","#9DCC00",
"#C20088","#003380","#FFA405","#FFA8BB",
"#426600","#FF0010","#5EF1F2","#00998F",
"#740AFF","#990000","#FFFF00")
导出门水平物种数据
p <- phyloseq::otu_table(phytax) %>% as.data.frame()
write.table (p,file ="phylumt.xls", sep ="\t",col.names = NA)
将丰度小于1%的归类于others
computed_persent <- function(path) {
data <- path %>%
read.delim(check.names = FALSE, row.names = 1)
data2 <- data %>%
mutate(sum = rowSums(.), persent = sum / sum(sum) * 100,
sum = NULL,) %>%
rbind(filter(., persent < 1) %>% colSums()) %>%
mutate(OTU_ID = c(data %>% rownames(), "others"))
filter(data2[1:(nrow(data2) - 1),], persent > 1) %>%
rbind(data2[nrow(data2),]) %>%
select(ncol(.), 1:(ncol(.) - 2)) %>%
set_rownames(seq_len(nrow(.))) %>%
return()
}
path <- "phylumt.xls"
数据整合
a1 <- computed_persent(path) %>% melt()
a2 <- "group.xls" %>% read.delim()
a4 <- NULL
for (i in seq_len(nrow(a1))) {
a4[i] <- a2[which(a2[, 1] == a1[i, 2]), 2] }
a1[, 4] <- a4
a1
数据可视化
ggplot(a1,aes(variable,value,fill=OTU_ID))+
geom_bar(stat="identity",position = "fill")+
facet_grid(. ~ V4,scales = "free",space="free_x")+
labs(x="",y="Proportions")+
scale_fill_manual(values = colors)+labs(fill="")+
theme(legend.title=element_blank())+
scale_y_continuous(expand=c(0,0))+theme_bw()