后面我们会陆陆续续有个系列,跟着别人好的paper中的图进行学习,试着重复别人图中的效果,来加强自己的学习。
今天就来测试和学习下面这个paper中的一个堆积柱状图,主要是分层注释的练习。
因为,没有paper中的数据,我们做测试就随机生成一点数据。
先分解整个图:
1. 堆积柱状图
2. 2层注释:上面一层注释,没3个一组注释,代表取样部位。下面注释代表取样时期。
3. 3层注释,按样本的宏基因组来源分类。
4. 第4层,3个文本信息
所以先生成一个测试数据,来表征柱状图,从图中看,包含21组样本,每个样本里面5种不同的变量。
library(reshape2)
library(ggnewscale)
library(ggplot2)
library(tidyverse)
#所以我们生成一个21行,5列的矩阵,5列的名称与paper中一样。
df <- as.data.frame(matrix(data = sample(0:50,105,replace = T),ncol = 5))
colnames(df) <- c('Heterogeneous selection','Homogeneous selection',
'Dispersal limitation','Homogenizing dispersal',
'Undominated')
#因为图中画的是百分比,我们转化成百分比信息
back.data<- data.frame()
for(n in 1:nrow(df))
{
tmp <- df[n,]/sum(df[n,])
back.data <- rbind(back.data,tmp)
}
back.data$ID <- rownames(back.data)
df_plot <- melt(back.data,id = "ID") #转化成短矩阵
df_plot$ID <- factor(df_plot$ID,levels=seq(1:27))
col <- c("purple","green","gold","red","gray")
#下面我们先画出基本的堆积柱状图框架
p <- ggplot(df_plot)+
geom_col(aes( x = ID, y = value, fill = variable), position = position_fill())+
scale_fill_manual(values = col,name="Ecological process")+
xlab(NULL)+
ylab("Relative Importantce")+
scale_y_continuous(labels = scales::percent_format())+
theme(axis.text.x = element_blank(),
axis.ticks.x=element_blank(),
text = element_text(size = 15))
p
下面,我们就要开始添加注释信息,信息我们以矩阵框的形式来添加,前面的散点图和曼哈顿图的例子我们也介绍过这种用法:
geom_rect()和geom_tile()函数的功能是一致的,但是参数有所区别:geom_rect()使用的是矩形四个顶点的位置,即xmin、xmax、ymin和ymax,而geom_tile()使用的是矩形的中心位置及其尺寸,即x、y、width、height。geom_tile()是geom_tile()的特例,其要求所有矩形的尺寸相同。
我们今天使用geom_rect来实现。
第一层的注释信息,应该包含下面的样本:
我们先生成一个矩形框的注释文件,文件包含注释框的ID,以及开始和结束位置:
annotation1 <- data.frame(ID=c("Plastic leaf","Phylloplane","Leaf endosphere","Rhizoplane","Root endosphere","Rhizophere soil","Bulk soil"),
xmin=seq(1,19,3),xmax=seq(3,21,3))
annotation1$ID <- factor(annotation1$ID,levels = annotation1$ID)
#然后根据注释信息添加矩形框
ggplot(df_plot)+
geom_col(aes( x = ID, y = value, fill = variable), position = position_fill())+
scale_fill_manual(values = col,name="Ecological process")+
xlab(NULL)+
ylab("Relative Importantce")+
scale_y_continuous(labels = scales::percent_format())+
theme(axis.text.x = element_blank(),
axis.ticks.x=element_blank(),
text = element_text(size = 15))+
new_scale('fill') +
geom_rect(data=annotation1,aes(xmin = xmin,xmax = xmax,
ymin = -0.06,ymax = -0.02,
fill = ID))+
scale_fill_manual(values = c('#FA26A0','#388E3C','#77ACF1',
'#FFC947','#B85C38','#AAAAAA','#364547'),
name = 'Niche')
从图中可以发现,对于注释框的其实和结束位置,我们需要在中心位置往前往后添加一半的距离,但是因为不同组之间需要有缝隙,所以我们就添加0.45的距离。
annotation1 <- data.frame(ID=c("Plastic leaf","Phylloplane","Leaf endosphere","Rhizoplane","Root endosphere","Rhizophere soil","Bulk soil"),xmin=seq(1,19,3)-0.45,xmax=seq(3,21,3)+0.45)
annotation1$ID <- factor(annotation1$ID,levels = annotation1$ID)
#在原先其实距离的基础上,望前望后添加了0.45的距离,因为添加0.5,组和组之间就没有缝隙了。
ggplot(df_plot)+
geom_col(aes( x = ID, y = value, fill = variable), position = position_fill())+
scale_fill_manual(values = col,name="Ecological process")+
xlab(NULL)+
ylab("Relative Importantce")+
scale_y_continuous(labels = scales::percent_format())+
theme(axis.text.x = element_blank(),
axis.ticks.x=element_blank(),
text = element_text(size = 15))+
new_scale('fill') +
geom_rect(data=annotation1,aes(xmin = xmin,xmax = xmax,
ymin = -0.06,ymax = -0.02,
fill = ID))+
scale_fill_manual(values = c('#FA26A0','#388E3C','#77ACF1',
'#FFC947','#B85C38','#AAAAAA','#364547'),
name = 'Niche')
下面添加第二层的注释信息,第二层相当于有27个注释,颜色按发育期来添色。所以,我们先添加一样的27个框,颜色按生育期着色,组之间的位置我们再调整。
annotation2 <- data.frame(ID=rep(c("Seedling stage","Tasseling stage","Mature stage"),7),
xmin=seq(0.5,20.5,1),xmax=seq(1.5,21.5,1))
annotation2$ID <- factor(annotation2$ID,levels = c("Seedling stage","Tasseling stage","Mature stage"))
#上面构建了一个关于二层注释框的位置信息,和生育期信息。
ggplot(df_plot)+
geom_col(aes( x = ID, y = value, fill = variable), position = position_fill())+
scale_fill_manual(values = col,name="Ecological process")+
xlab(NULL)+
ylab("Relative Importantce")+
scale_y_continuous(labels = scales::percent_format())+
theme(axis.text.x = element_blank(),
axis.ticks.x=element_blank(),
text = element_text(size = 15))+
new_scale('fill') +
geom_rect(data=annotation1,aes(xmin = xmin,xmax = xmax,
ymin = -0.06,ymax = -0.02,
fill = ID))+
scale_fill_manual(values = c('#FA26A0','#388E3C','#77ACF1',
'#FFC947','#B85C38','#AAAAAA','#364547'),
name = 'Niche')+
new_scale('fill') +
geom_rect(data=annotation2,aes(xmin = xmin,xmax = xmax,
ymin = -0.11,ymax = -0.07,
fill = ID))+
scale_fill_brewer(palette = 'Set1',name="Stage")
但是,因为有7个组,所以组和组之间是需要留点空隙的,这样子,我们就需要对比如3-4,6-7,9-10,12-13,15-16,18-19之间需要微调,各自减去0.05。而1和21也各自减去0.05。
这样来说,需要加上0.05的有1,4,7,10,13,16,19。而需要往减去0.05的有3,6,9,12,15,18,21
annotation2 <- data.frame(ID=rep(c("Seedling stage","Tasseling stage","Mature stage"),7),
xmin=seq(0.5,20.5,1),xmax=seq(1.5,21.5,1))
annotation2$ID <- factor(annotation2$ID,levels = c("Seedling stage","Tasseling stage","Mature stage"))
index1 <- seq(1,19,3)
annotation2$xmin[index1] <- annotation2$xmin[index1] +0.05
index2 <- seq(3,21,3)
annotation2$xmax[index2] <- annotation2$xmax[index2] -0.05
这样操作之后,获得我们新的第二层注释文件。
ggplot(df_plot)+
geom_col(aes( x = ID, y = value, fill = variable), position = position_fill())+
scale_fill_manual(values = col,name="Ecological process")+
xlab(NULL)+
ylab("Relative Importantce")+
scale_y_continuous(labels = scales::percent_format())+
theme(axis.text.x = element_blank(),
axis.ticks.x=element_blank(),
text = element_text(size = 15))+
new_scale('fill') +
geom_rect(data=annotation1,aes(xmin = xmin,xmax = xmax,
ymin = -0.06,ymax = -0.02,
fill = ID))+
scale_fill_manual(values = c('#FA26A0','#388E3C','#77ACF1',
'#FFC947','#B85C38','#AAAAAA','#364547'),
name = 'Niche')+
new_scale('fill') +
geom_rect(data=annotation2,aes(xmin = xmin,xmax = xmax,
ymin = -0.11,ymax = -0.07,
fill = ID))+
scale_fill_brewer(palette = 'Set1',name="Stage")
这样画出来的图,在第二层注释之间就会有间隙了。
第三层比较简单,就是三组。
annotation3 <- data.frame(ID=c("Air","Plant","Soil"),
xmin=c(1,4,16)-0.45,xmax=c(3,15,21)+0.45)
ggplot(df_plot)+
geom_col(aes( x = ID, y = value, fill = variable), position = position_fill())+
scale_fill_manual(values = col,name="Ecological process")+
xlab(NULL)+
ylab("Relative Importantce")+
scale_y_continuous(labels = scales::percent_format())+
theme(axis.text.x = element_blank(),
axis.ticks.x=element_blank(),
text = element_text(size = 15))+
new_scale('fill') +
geom_rect(data=annotation1,aes(xmin = xmin,xmax = xmax,
ymin = -0.06,ymax = -0.02,
fill = ID))+
scale_fill_manual(values = c('#FA26A0','#388E3C','#77ACF1',
'#FFC947','#B85C38','#AAAAAA','#364547'),
name = 'Niche')+
new_scale('fill') +
geom_rect(data=annotation2,aes(xmin = xmin,xmax = xmax,
ymin = -0.11,ymax = -0.07,
fill = ID))+
scale_fill_brewer(palette = 'Set1',name="Stage")+
new_scale('fill') +
geom_rect(data=annotation3,aes(xmin = xmin,xmax = xmax,
ymin = -0.13,ymax = -0.12),fill="black",
show.legend = F)
第4层就是三个文本信息,位置其实就是第3层的中间位置。
所以基本就是x轴的2,9.5,18.5
所以我们添加文本信息:
ggplot(df_plot)+
geom_col(aes( x = ID, y = value, fill = variable), position = position_fill())+
scale_fill_manual(values = col,name="Ecological process")+
xlab(NULL)+
ylab("Relative Importantce")+
scale_y_continuous(labels = scales::percent_format())+
theme(axis.text.x = element_blank(),
axis.ticks.x=element_blank(),
text = element_text(size = 15))+
new_scale('fill') +
geom_rect(data=annotation1,aes(xmin = xmin,xmax = xmax,
ymin = -0.06,ymax = -0.02,
fill = ID))+
scale_fill_manual(values = c('#FA26A0','#388E3C','#77ACF1',
'#FFC947','#B85C38','#AAAAAA','#364547'),
name = 'Niche')+
new_scale('fill') +
geom_rect(data=annotation2,aes(xmin = xmin,xmax = xmax,
ymin = -0.11,ymax = -0.07,
fill = ID))+
scale_fill_brewer(palette = 'Set1',name="Stage")+
new_scale('fill') +
geom_rect(data=annotation3,aes(xmin = xmin,xmax = xmax,
ymin = -0.13,ymax = -0.12),fill="black",
show.legend = F)+
geom_text(aes(x = 2,y = -0.18,label = 'Air'),size = 8) +
geom_text(aes(x = 9.5,y = -0.18,label = 'Plant'),size = 8) +
geom_text(aes(x = 18.5,y = -0.18,label = 'Soil'),size = 8)
下面,我们来调整一下legend的位置,用cowplot::plot_grid来实现。
main_plot <- ggplot(df_plot)+
geom_col(aes( x = ID, y = value, fill = variable), position = position_fill())+
scale_fill_manual(values = col,name="Ecological process")+
xlab(NULL)+
ylab("Relative Importantce")+
scale_y_continuous(labels = scales::percent_format())+
theme(axis.text.x = element_blank(),
axis.ticks.x=element_blank(),
text = element_text(size = 15))+
new_scale('fill') +
geom_rect(data=annotation1,aes(xmin = xmin,xmax = xmax,
ymin = -0.06,ymax = -0.02,
fill = ID),show.legend = F)+
scale_fill_manual(values = c('#FA26A0','#388E3C','#77ACF1',
'#FFC947','#B85C38','#AAAAAA','#364547'),
name = 'Niche')+
new_scale('fill') +
geom_rect(data=annotation2,aes(xmin = xmin,xmax = xmax,
ymin = -0.11,ymax = -0.07,
fill = ID),show.legend = F)+
scale_fill_brewer(palette = 'Set1',name="Stage")+
new_scale('fill') +
geom_rect(data=annotation3,aes(xmin = xmin,xmax = xmax,
ymin = -0.13,ymax = -0.12),fill="black",
show.legend = F)+
geom_text(aes(x = 2,y = -0.18,label = 'Air'),size = 8) +
geom_text(aes(x = 9.5,y = -0.18,label = 'Plant'),size = 8) +
geom_text(aes(x = 18.5,y = -0.18,label = 'Soil'),size = 8)
lg1 <- ggplot(df_plot)+
geom_col(aes( x = ID, y = value, fill = variable), position = position_fill(),show.legend = F)+
scale_fill_manual(values = col,name="Ecological process")+
xlab(NULL)+
ylab("Relative Importantce")+
scale_y_continuous(labels = scales::percent_format())+
theme(axis.text.x = element_blank(),
axis.ticks.x=element_blank(),
text = element_text(size = 15))+
new_scale('fill') +
geom_rect(data=annotation1,aes(xmin = xmin,xmax = xmax,
ymin = -0.06,ymax = -0.02,
fill = ID))+
scale_fill_manual(values = c('#FA26A0','#388E3C','#77ACF1',
'#FFC947','#B85C38','#AAAAAA','#364547'),
name = 'Niche')+
new_scale('fill') +
geom_rect(data=annotation2,aes(xmin = xmin,xmax = xmax,
ymin = -0.11,ymax = -0.07,
fill = ID))+
scale_fill_brewer(palette = 'Set1',name="Stage")+
new_scale('fill') +
geom_rect(data=annotation3,aes(xmin = xmin,xmax = xmax,
ymin = -0.13,ymax = -0.12),fill="black",
show.legend = F)+
geom_text(aes(x = 2,y = -0.18,label = 'Air'),size = 8) +
geom_text(aes(x = 9.5,y = -0.18,label = 'Plant'),size = 8) +
geom_text(aes(x = 18.5,y = -0.18,label = 'Soil'),size = 8) +
theme(legend.direction = "horizontal", legend.position = "bottom")
niche_legend <- cowplot::get_legend(lg1) #获取采样部位和时期的legend信息
cowplot::plot_grid(plotlist = list(main_plot,niche_legend),ncol = 1, nrow = 2,
rel_heights = c(5, 1))
多个图的话,可以一起合。