1.箱线图
盒子内反映了半数的数据分布,因此盒子的高度反映了这些数据的波动状态,盒子越扁表示数据分布比较集中;
伸出去的胡须的上下边缘一般代表了数据的最大值和最小值(也可以设置为1.5倍IQR等),伸出去的胡须越短,也说明数据比较集中。
近似正态分布的箱线图与正态分布的概率密度函数的比较:
- 对于正态分布而言,0.7%的数据异常值;
-
注意区分最大值、最大数、最小值、最小数。
2.基础绘图
ggplot()+geom_boxplot()
# 导入绘图数据
> phe<-read.csv("phe.csv")
# 查看数据,第一列为分组,后四列为四个性状
> head(phe)
group TL SA AD NR
1 weak 24.12 6.67 0.804 20.23
2 weak 24.12 6.67 0.804 20.23
3 weak 24.12 6.67 0.804 20.23
4 strong 43.31 12.96 0.875 25.23
5 strong 43.31 12.96 0.875 25.23
6 strong 43.31 12.96 0.875 25.23
# 载入ggplot2 R包
> library(ggplot2)
# 绘图
> ggplot(phe,aes(x=group,y=TL,fill=group))+geom_boxplot()
3.显著性差异分析
# 载入显著性分析R包ggpubr
> library(ggpubr)
# 绘图
> ggplot(phe,aes(x=group,y=TL,fill=group))
+ geom_boxplot()+stat_compare_means(method = "t.test")
想把t值换为星号,在stat_compare_means()中加入label="p.signif"
> ggplot(phe,aes(x=group,y=TL,fill=group)) +
geom_boxplot() + stat_compare_means(method = "t.test", label="p.signif" )
4. 添加显著性线段,ggsignif
- comparisons = list(c("strong","weak")),比较strong和weak两组,如果这里有多组,也可以同时设置两两比较,如comparisons = list(c("A","B"), c("A","C"), c("B","C"))
- map_signif_level = TRUE ,TRUE显示星号,FALSE显示数字
- test = t.test 差异检验方式
- y_position = c(80,30), 差异线的y轴值
- tip_length = c(0.05,0.4)),差异线臂的长度,左边柱子高,我设置了0.05,右边柱子矮设置了0.4
# 用到ggsignif这个包
> library(ggsignif)
> ggplot(phe,aes(x=group,y=TL,fill=group)) + geom_boxplot() +
geom_signif(comparisons = list(c("strong","weak")),
map_signif_level = TRUE, test = t.test, y_position = c(80,30),
tip_length = c(0.05,0.4))
5.美化
- 去掉画布背景颜色:theme_bw()
- 更改颜色:scale_fill_manual(values = c("#DE6757","#5B9BD5"))
- 去掉画布网格:
theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank()) - 坐标轴字号
theme(axis.title.x =element_text(size=14,face = "bold"), axis.title.y=element_text(size=14,face = "bold"),axis.text = element_text(size = 14,face = "bold")) - 横纵坐标轴标注:labs(x="Group", y="TL value")
> ggplot(phe,aes(x=group,y=TL,fill=group)) +
geom_boxplot()+ geom_signif(comparisons = list(c("strong","weak")),map_signif_level = TRUE,test = t.test,y_position = c(80,30),tip_length = c(0.05,0.4))+ theme_bw()+
scale_fill_manual(values = c("#DE6757","#5B9BD5"))+
theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())+ labs(x="Group", y="TL value")+
theme(axis.title.x =element_text(size=12,face = "bold"), axis.title.y=element_text(size=12,face = "bold"),axis.text = element_text(size = 12,face = "bold"))
6.合并出图
用到ggpubr包中的ggarrange()函数
> library(ggpubr)
> a<-ggplot(phe,aes(x=group,y=TL,fill=group)) +geom_boxplot()+geom_signif(comparisons = list(c("strong","weak")),map_signif_level = TRUE,test = t.test,y_position = c(80,30),tip_length = c(0.05,0.4))+theme_bw()+ scale_fill_manual(values = c("#DE6757","#5B9BD5"))+theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())+labs(x="Group", y="TL value")+theme(axis.title.x =element_text(size=12,face = "bold"), axis.title.y=element_text(size=12,face = "bold"),axis.text = element_text(size = 12,face = "bold"))
> b<-ggplot(phe,aes(x=group,y=SA,fill=group)) +geom_boxplot()+geom_signif(comparisons = list(c("strong","weak")),map_signif_level = TRUE,test = t.test,y_position = c(17,5),tip_length = c(0.05,0.4))+theme_bw()+ scale_fill_manual(values = c("#DE6757","#5B9BD5"))+theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())+labs(x="Group", y="SA value")+theme(axis.title.x =element_text(size=12,face = "bold"), axis.title.y=element_text(size=12,face = "bold"),axis.text = element_text(size = 12,face = "bold"))
> c<-ggplot(phe,aes(x=group,y=AD,fill=group)) +geom_boxplot()+geom_signif(comparisons = list(c("strong","weak")),map_signif_level = TRUE,test = t.test,y_position = c(0.9,0),tip_length = c(0.05,0.05))+theme_bw()+ scale_fill_manual(values = c("#DE6757","#5B9BD5"))+theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())+labs(x="Group", y="AD value")+theme(axis.title.x =element_text(size=12,face = "bold"), axis.title.y=element_text(size=12,face = "bold"),axis.text = element_text(size = 12,face = "bold"))
> d<-ggplot(phe,aes(x=group,y=NR,fill=group)) +geom_boxplot()+geom_signif(comparisons = list(c("strong","weak")),map_signif_level = TRUE,test = t.test,y_position = c(40,0),tip_length = c(0.05,0.4))+theme_bw()+ scale_fill_manual(values = c("#DE6757","#5B9BD5"))+theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())+labs(x="Group", y="NR value")+theme(axis.title.x =element_text(size=12,face = "bold"), axis.title.y=element_text(size=12,face = "bold"),axis.text = element_text(size = 12,face = "bold"))
> pdf("phe.pdf",width = 8,height = 8)
> ggarrange(a,b,c,d + rremove("x.text"), ncol = 2, nrow = 2)
> dev.off()