本文使用基因表达数据绘制箱式图,并叠加小提琴图和点图 (geom_boxplot绘制箱式图,geom_violin绘制小提琴图,geom_dotplot和geom_jitter绘制点图).
了解一下R语言中箱式图的术语,以及它的含义:
导入数据:
>genefpkm <- read.csv(file = "clipboard",header = T,sep = "\t")
>head(genefpkm)
x_d <- genefpkm #复制数据框,万一后面操作失误就不用重新导入数据。
x_d <- as.matrix(x_d) #变成矩阵类型才能进行接下来的操作
x_d <- matrix(log10(as.numeric(x_d)),dimnames = list(row.names(x_d),colnames(x_d)),nrow = dim(x_d)[1]) #对矩阵中的每个数取log10,使数据差异减小。有些表达量为0,在这一步会返回Inf,在接下来画图时会直接排除掉。
group <- c(rep("LPE",4*dim(genefpkm)[1]),rep("LPF",4*dim(genefpkm)[1])) #分组情况
data <- data.frame(expression=c(x_d),sample=rep(colnames(x_d),each=nrow(x_d)),group = group) #添加分组
开始画图:
ggplot(data = data,aes(x=sample,y=expression,fill = group))+
stat_boxplot(geom = "errorbar",size = 1,width = 0.3,na.rm = T)+ #添加误差线
geom_boxplot(linetype = 2,na.rm = T,outlier.alpha = 0.3,outlier.size = 3,notch = T) + # notch参数会在箱式图的中位线处生成缺口,可以比较缺口有无重叠,来判断中位数是否有差异。linetype的值有很多,不同的值代表不同的线(在R语言工作区中输入vignette("ggplot2-specs")有详细解释)
xlab("Samples") + ylab("log10(FPKM)")+
theme(axis.text = element_text(size = rel(1.2)),
axis.line = element_line(size = rel(1.5)),
axis.title = element_text(size = rel(1.5)),
panel.background = element_blank())+
scale_fill_manual(values = c("darkolivegreen1", "deeppink")) #手动设置颜色
此时,中间是虚线,两端是实线(其实都是虚线,只是误差线是实线,覆盖了两端的虚线)
ggplot(data = data,aes(x=sample,y=expression,fill = group))+
stat_boxplot(geom = "errorbar",size = 1,width = 0.3,na.rm = T,linetype = 2)+
geom_boxplot(linetype = 2,na.rm = T,outlier.alpha = 0.3,outlier.size = 3,notch = T) +
stat_boxplot(aes(ymin = ..lower..,ymax = ..upper..),size = 1,alpha = 1,notch = T,outlier.shape = NA,na.rm = T)+ #这行代码只会画出中间的箱子,上下的线不会画出来,因为设置了ymin = ..lower.. , ymax = ..upper..,可以看看本文第一幅图,理解ymin和ymax是什么意思。
xlab("Samples") + ylab("log10(FPKM)")+
theme(axis.text = element_text(size = rel(1.2)),
axis.line = element_line(size = rel(1.5)),
axis.title = element_text(size = rel(1.5)),
panel.background = element_blank())+
scale_fill_manual(values = c("darkolivegreen1", "deeppink"))
此时中间是实线,两端是虚线(其实全都是虚线,只是中间又画了实线的框框,覆盖了虚线)
箱式图叠加小提琴图:
ggplot(data = data,aes(x=sample,y=expression,fill = group))+
geom_violin(linetype = "dashed",na.rm = T)+
stat_boxplot(geom = "errorbar",size = 1,width = 0.3,na.rm = T,linetype = 2)+
geom_boxplot(linetype = 2,na.rm = T,outlier.alpha = 0.3,outlier.size = 3,notch = T,width = 0.3) + #设置箱式图的宽度,避免和小提琴图重合。
stat_boxplot(aes(ymin = ..lower..,ymax = ..upper..),size = 1,width = 0.3,notch = T,na.rm = T)+
xlab("Samples") + ylab("log10(FPKM)")+
theme(axis.text = element_text(size = rel(1.2)),
axis.line = element_line(size = rel(1.5)),
axis.title = element_text(size = rel(1.5)),
panel.background = element_blank())+
scale_fill_manual(values = c("darkolivegreen1", "deeppink"))
去掉误差线和离群点:
ggplot(data = data,aes(x=sample,y=expression,fill = group))+
geom_violin(na.rm = T)+
geom_boxplot(linetype = 2,na.rm = T,notch = T,width = 0.3,outlier.shape = NA) +
stat_boxplot(aes(ymin = ..lower..,ymax = ..upper..),size = 1,width = 0.3,notch = T,outlier.shape = NA,na.rm = T)+
xlab("Samples") + ylab("log10(FPKM)")+
theme(axis.text = element_text(size = rel(1.2)),
axis.line = element_line(size = rel(1.5)),
axis.title = element_text(size = rel(1.5)),
panel.background = element_blank())+
scale_fill_manual(values = c("darkolivegreen1", "deeppink"))
点图也可以表示小提琴图的含义:
ggplot(data = data,aes(x=sample,y=expression,fill = group))+
geom_boxplot(linetype = 2,na.rm = T,notch = T,width = 0.3,outlier.shape = NA) +
stat_boxplot(aes(ymin = ..lower..,ymax = ..upper..),size = 1,width = 0.3,notch = T,outlier.shape = NA,na.rm = T)+
geom_dotplot(binaxis = "y",stackdir = "center",dotsize = 0.11,method = "histodot",stackratio = 0.01,na.rm = T)+ #由于点很多,可以缩小点的大小和比例,来展示所有点。
xlab("Samples") + ylab("log10(FPKM)")+
theme(axis.text = element_text(size = rel(1.2)),
axis.line = element_line(size = rel(1.5)),
axis.title = element_text(size = rel(1.5)),
panel.background = element_blank())+
scale_fill_manual(values = c("darkolivegreen1", "deeppink"))