前面一系列可视化教程,都是通过将数据整合成phyloseq
对象再运用MicrobiotaProcess
包已经封装好的函数进行可视化操作,虽然简洁好用,但是灵活性不足,现在让我们根据已有数据来一套组合拳
1.Hcluster_bar
phylum.xls,otu_table.xls文件可根据之前介绍步骤导出
1.1 自定义颜色
rm(list = ls())
pacman::p_load(tidyverse,reshape,ggtree,aplot,RColorBrewer)
colors <-c("#E41A1C","#1E90FF","#FF8C00","#4DAF4A","#984EA3",
"#40E0D0","#FFC0CB","#00BFFF","#FFDEAD","#90EE90",
"#EE82EE","#00FFFF","#F0A3FF", "#0075DC","#993F00",
"#4C005C","#2BCE48","#FFCC99","#808080","#94FFB5",
"#8F7C00","#9DCC00","#C20088","#003380","#FFA405",
"#FFA8BB","#426600","#FF0010","#5EF1F2","#00998F",
"#740AFF","#990000","#FFFF00")
1.2 读入数据绘制行聚类树
p <- read.delim("otu_table.xls",header = T,
sep="\t",check.names = F,row.names = 1) %>% t()
tree <- hclust(dist(p)) %>%
ggtree(layout="rectangular", branch.length="none")
1.3 绘制水平柱状图
bar <- read.delim("phylum.xls",header = T,
sep="\t",check.names = F) %>%
melt() %>% ggplot(aes(variable ,value,fill=OTU))+
geom_bar(stat="identity")+labs(x="",y="")+
theme_bw()+
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())+
scale_fill_manual(values = colors)+labs(fill="taxon")+
scale_y_continuous(expand=c(0,0))+coord_flip()
1.4 aplot包将聚类树与柱状图拼接
bar%>%insert_left(tree,width=.3)
适合大数据的聚类圈图
rm(list = ls())
pacman::p_load(tidyverse,reshape,ggtree,ggtreeExtra,ggnewscale)
col <- c(34, 51, 142, 26, 31, 371,
36, 7, 12, 30, 84, 88, 116,
121, 77, 56, 386, 373, 423,
435, 438, 471, 512, 130, 52,
47, 6, 11, 43, 54, 367, 382,
422, 4, 8, 375, 124, 448, 419,
614, 401, 403, 613, 583, 652,
628, 633, 496, 638, 655, 132, 503, 24)
mycolors <- c(col,colors()[rep(col,5)])
p <- read.tree("otu_hc.tre") %>%
ggtree(branch.length = "none", layout = "fan", size = 0.3)
dat <- read.delim("phylum.xls") %>% melt()
p2 <- p +
new_scale_fill() +
geom_fruit(data = dat,
mapping = aes(x = value, fill = OTU.ID, y = variable),
geom = geom_bar,orientation = "y",stat = "identity") +
geom_tiplab(offset = 7, size = 1)+
scale_fill_manual(values = mycolors)
p2
2. 物种组成柱状图
绘制之前需要先对属水平数据进行过滤,可根据https://www.jianshu.com/p/e1dde3571d16中的代码对数据进行过滤
2.1 数据过滤
rm(list=ls())
colors <-c("#E41A1C","#1E90FF","#FF8C00","#4DAF4A","#984EA3",
"#40E0D0","#FFC0CB","#00BFFF","#FFDEAD","#90EE90",
"#EE82EE","#00FFFF","#F0A3FF", "#0075DC",
"#993F00","#4C005C","#2BCE48","#FFCC99",
"#808080","#94FFB5","#8F7C00","#9DCC00",
"#C20088","#003380","#FFA405","#FFA8BB",
"#426600","#FF0010","#5EF1F2","#00998F",
"#740AFF","#990000","#FFFF00")
pacman::p_load(tidyverse,magrittr,reshape,RColorBrewer,ggtree)
computed_persent <- function(path) {
data <- path %>%
read.delim(check.names = FALSE, row.names = 1)
data2 <- data %>%
mutate(sum = rowSums(.), persent = sum / sum(sum) * 100, sum = NULL,) %>%
rbind(filter(., persent < 1) %>% colSums()) %>%
mutate(OTU= c(data %>% rownames(),"others"))
filter(data2[1:(nrow(data2) - 1),], persent > 1) %>%
rbind(data2[nrow(data2),]) %>%
select(ncol(.), 1:(ncol(.) - 2)) %>%
set_rownames(seq_len(nrow(.))) %>%
return()
}
path <- "genus.xls"
2.2 整合分组文件
a1 <- computed_persent(path) %>% melt()
a2 <- "group.txt" %>% read.delim()
a4 <- NULL
for (i in seq_len(nrow(a1))) {
a4[i] <- a2[which(a2[, 1] == a1[i, 2]), 2] }
a1[, 4] <- a4
a1
2.3 ggplot2可视化
ggplot(a1,aes(variable,value,fill=OTU))+
geom_bar(stat="identity",position = "fill")+
facet_grid(. ~ V4,scales = "free",space="free_x")+
labs(x="",y="Proportions")+
scale_fill_manual(values = colors)+labs(fill="")+
theme(legend.title=element_blank())+
scale_y_continuous(expand=c(0,0))+theme_bw()
感谢身边的大佬朋友提供代码,没有他的支援某些步骤没法这么顺利进行
未完待续......