1.密度图 ggdensity
1.1 用法
# 密度图 ggdensity
ggdensity(
# 数据,数据框
data,
# 横坐标,绘制的变量
x,
# 纵坐标,引用数据需""
y = "..density..",
# 逻辑值。默认值为FALSE。仅当y是包含要绘制的多个变量的向量时使用。
# 如果为TRUE,则通过组合y变量的图来创建多面板图
combine = FALSE,
# 逻辑或字符值。默认值为FALSE。仅当y是包含要绘制的多个变量的向量时使用。
# 如果为TRUE,则在同一绘图区域中合并多个y变量。允许的值还包括"asis”(TRUE)和“ flip”。如果merge =“ flip”,则y变量用作x刻度标签,而x变量用作分组变量。
merge = FALSE,
# 密度线颜色和填充颜色
color = "black",
fill = NA,
# 用于按组着色或填充的调色板
palette = NULL,
# 更改点和轮廓的大小
size = NULL,
linetype = "solid",
# 透明度
alpha = 0.5,
# 标题及标签
title = NULL,
xlab = NULL,
ylab = NULL,
# 指定用于将图面划分为多个面板的分组变量
facet.by = NULL,
panel.labs = NULL,
short.panel.labs = TRUE,
# 允许的值是“平均值”或“中位数”之一(分别用于添加平均值或中位数线)
add = c("none", "mean", "median"),
add.params = list(linetype = "dashed"),
# 辑值值,如果为TRUE,则添加边缘地毯。
rug = FALSE,
label = NULL,
# 字体标签,一个可以包含以下元素的组合的列表:
# 大小(例如:14),样式(例如:“纯”,“粗体”,“斜体”,“ bold.italic”)
# 颜色(例如:“红色”)的标签。例如font.label = list(size = 14,face =“ bold”,颜色=“ red”)。
# 若要仅指定大小和样式,请使用font.label = list(size = 14,face =“ plain”)
font.label = list(size = 11, color = "black"),
label.select = NULL,
# 是否使用ggrepel避免过度绘制文本标签的逻辑值
repel = FALSE,
label.rectangle = FALSE,
ggtheme = theme_pubr(),
...
)
1.2 自定义参数ggpar()
使用函数ggpar()可以轻松自定义绘图, 阅读?ggpar
进行更改:
- 主标题和轴标签:main,xlab,ylab
- 轴区间:xlim,ylim(例如:ylim = c(0,30))
- 轴比例尺:xscale,yscale(例如:yscale ="log2")
- 调色板:palette="Dark2"或调色板= c("gray","blue","red")
- 图例标题,标签和位置:legend="right"
- 绘图方向:orientation = c("vertical", "horizontal", "reverse")
1.3 实例
# Create some data format
set.seed(1234)
wdata = data.frame(
sex = factor(rep(c("F", "M"), each=200)),
weight = c(rnorm(200, 55), rnorm(200, 58)))
head(wdata, 4)#> sex weight
#> 1 F 53.79293
#> 2 F 55.27743
#> 3 F 56.08444
#> 4 F 52.65430
# Basic density plot
# Add mean line and marginal rug
p1 <- ggdensity(wdata, x = "weight", fill = "lightgray",
add = "mean", rug = TRUE)
p1
#> Warning: geom_vline(): Ignoring `mapping` because `xintercept` was provided.#> Warning: geom_vline(): Ignoring `data` because `xintercept` was provided.
# Change outline colors by groups ("sex")
# Use custom palette
p2 <- ggdensity(wdata, x = "weight",
add = "mean", rug = TRUE,
color = "sex", palette = c("#00AFBB", "#E7B800"))
p2
# Change outline and fill colors by groups ("sex")
# Use custom palette
p3 <- ggdensity(wdata, x = "weight",
add = "mean", rug = TRUE,
color = "sex", fill = "sex",
palette = c("#00AFBB", "#E7B800"))
p3
p4 <- ggdensity(wdata, x = "weight",
add = "mean", rug = TRUE,
color = "sex", fill = "sex",
palette = c("#00AFBB", "#E7B800"),
facet.by = "sex")
p4
2. 叠加正常密度图 stat_overlay_normal_density
在视觉上检查偏离正常程度的程度
2.1 用法
stat_overlay_normal_density(
# 由aes()或aes_()创建的美学映射集。如果指定且继承.aes = TRUE(默认值),它将与绘图顶层的默认映射结合。如果没有绘图映射,则必须提供映射。
mapping = NULL,
data = NULL,
geom = "line",
position = "identity",
na.rm = FALSE,
show.legend = NA,
inherit.aes = TRUE,
...
)
2.2 举例
# Simpledensity plot
data("mtcars")
p1 <- ggdensity(mtcars, x = "mpg", fill = "red") +
scale_x_continuous(limits = c(-1, 50)) +
stat_overlay_normal_density(color = "red", linetype = "dashed")
p1
# Color by groups
data(iris)
p2 <- ggdensity(iris, "Sepal.Length", color = "Species") +
stat_overlay_normal_density(aes(color = Species), linetype = "dashed")
p2
# Facet
p3 <- ggdensity(iris, "Sepal.Length", facet.by = "Species") +
stat_overlay_normal_density(color = "red", linetype = "dashed")
p3
3.经验累积密度函数 Empirical cumulative density function
样本分布函数(sample distribution function)亦称经验分布函数,统计学中的基本概念之一。样本分布函数Fn(x)具有分布函数的性质,我们可以将其看成是以等概率1/n 取值X1,X2,…,Xn的离散型随机变量的分布函数,且该函数的图形呈跳跃式一条台阶形折线,如观测值不重复,则每一跳跃为1/n ,如有重复,则按1/n的倍数跳跃上升。
3.1 用法
ggecdf(
data,
x,
combine = FALSE,
merge = FALSE,
color = "black",
palette = NULL,
size = NULL,
linetype = "solid",
title = NULL,
xlab = NULL,
ylab = NULL,
facet.by = NULL,
panel.labs = NULL,
short.panel.labs = TRUE,
ggtheme = theme_pubr(),
...
)
3.2 举例
# Create some data format
set.seed(1234)
wdata = data.frame(
sex = factor(rep(c("F", "M"), each=200)),
weight = c(rnorm(200, 55), rnorm(200, 58)))
head(wdata, 4)#> sex weight
#> 1 F 53.79293
#> 2 F 55.27743
#> 3 F 56.08444
#> 4 F 52.65430
# Basic ECDF plot
p1 <- ggecdf(wdata, x = "weight")
p1
# Change colors and linetype by groups ("sex")
# Use custom palette
p2 <- ggecdf(wdata, x = "weight",
color = "sex", linetype = "sex",
palette = c("#00AFBB", "#E7B800"))
p2
4.直方图 Histogram plot
4.1 用法
gghistogram(
# 数据,数据框
data,
# 横坐标,绘制的变量
x,
# 纵坐标,引用数据需" "
y = "..count..",
# 逻辑值。默认值为FALSE。仅当y是包含要绘制的多个变量的向量时使用。
# 如果为TRUE,则通过组合y变量的图来创建多面板图
combine = FALSE,
# 逻辑或字符值。默认值为FALSE。仅当y是包含要绘制的多个变量的向量时使用。
# 如果为TRUE,则在同一绘图区域中合并多个y变量。允许的值还包括"asis”(TRUE)和“ flip”。如果merge =“ flip”,则y变量用作x刻度标签,而x变量用作分组变量。
merge = FALSE,
weight = NULL,
color = "black",
fill = NA,
# 着色板,用于按组着色或填充的调色板
palette = NULL,
size = NULL,
linetype = "solid",
alpha = 0.5,
# bin数默认为30。
bins = NULL,
# 指定箱宽的数值。当您有很强的密集点图时,请使用介于0和1之间的值。例如binwidth = 0.2。
binwidth = NULL,
title = NULL,
xlab = NULL,
ylab = NULL,
# 长度为1或2的字符向量,指定用于将图面划分为多个面板的分组变量。应该在数据中
facet.by = NULL,
# 用于修改构面面板标签的一个或两个字符向量的列表。例如,panel.labs = list(sex = c(“ Male”,“ Female”))指定“ sex”变量的标签。对于两个分组变量,您可以使用例如panel.labs = list(sex = c(“ Male”,“ Female”),rx = c(“ Obs”,“ Lev”,“ Lev2”)))。
panel.labs = NULL,
short.panel.labs = TRUE,
add = c("none", "mean", "median"),
# 参数'add'的参数(颜色,大小,线型);例如:add.params = list(颜色=“红色”)。
add.params = list(linetype = "dashed"),
rug = FALSE,
add_density = FALSE,
label = NULL,
font.label = list(size = 11, color = "black"),
label.select = NULL,
repel = FALSE,
label.rectangle = FALSE,
position = position_identity(),
ggtheme = theme_pubr(),
...
)
4.2 自定义参数ggpar()
使用函数ggpar()可以轻松自定义绘图, 阅读?ggpar进行更改:
- 主标题和轴标签:main,xlab,ylab
- 轴区间:xlim,ylim(例如:ylim = c(0,30))
- 轴比例尺:xscale,yscale(例如:yscale ="log2")
- 调色板:palette="Dark2"或调色板= c("gray","blue","red")
- 图例标题,标签和位置:legend="right"
- 绘图方向:orientation = c("vertical", "horizontal", "reverse")
4.3 实例
# Create some data format
set.seed(1234)
wdata = data.frame(
sex = factor(rep(c("F", "M"), each=200)),
weight = c(rnorm(200, 55), rnorm(200, 58)))
head(wdata, 4)
#> sex weight
#> 1 F 53.79293
#> 2 F 55.27743
#> 3 F 56.08444
#> 4 F 52.65430
# Basic density plot
# Add mean line and marginal rug
p1 <- gghistogram(wdata, x = "weight", fill = "lightgray",
add = "mean", rug = TRUE)
p1
#> Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.#> Warning: geom_vline(): Ignoring `mapping` because `xintercept` was provided.#> Warning: geom_vline(): Ignoring `data` because `xintercept` was provided.
# Change outline colors by groups ("sex")
# Use custom color palette
p2 <- gghistogram(wdata, x = "weight",
add = "mean", rug = TRUE,
color = "sex", palette = c("#00AFBB", "#E7B800"))
p2
#> Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.
# Change outline and fill colors by groups ("sex")
# Use custom color palette
p3 <- gghistogram(wdata, x = "weight",
add = "mean", rug = TRUE,
color = "sex", fill = "sex",
palette = c("#00AFBB", "#E7B800"))
p3
#> Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.
# Combine histogram and density plots
p4 <- gghistogram(wdata, x = "weight",
add = "mean", rug = TRUE,
fill = "sex", palette = c("#00AFBB", "#E7B800"),
add_density = TRUE)
p4
#> Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.
# Weighted histogram
p5 <- gghistogram(iris, x = "Sepal.Length", weight = "Petal.Length")
p5
#> Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.
5.分位数图示法 Quantile Quantile Plot
统计学里Q-Q图(Q代表分位数)是一个概率图,用图形的方式比较两个概率分布,把他们的两个分位数放在一起比较。首先选好分位数间隔。图上的点(x,y)反映出其中一个第二个分布(y坐标)的分位数和与之对应的第一分布(x坐标)的相同分位数。因此,这条线是一条以分位数间隔为参数的曲线。如果两个分布相似,则该Q-Q图趋近于落在y=x线上。如果两分布线性相关,则点在Q-Q图上趋近于落在一条直线上,但不一定在y=x线上。Q-Q图可以用来可在分布的位置-尺度范畴上可视化的评估参数。
从定义中可以看出Q-Q图主要用于检验数据分布的相似性,如果要利用Q-Q图来对数据进行正态分布的检验,则可以令x轴为正态分布的分位数,y轴为样本分位数,如果这两者构成的点分布在一条直线上,就证明样本数据与正态分布存在线性相关性,即服从正态分布。
5.1 用法
ggqqplot(
data,
x,
combine = FALSE,
merge = FALSE,
color = "black",
palette = NULL,
size = NULL,
shape = NULL,
add = c("qqline", "none"),
add.params = list(linetype = "solid"),
conf.int = TRUE,
conf.int.level = 0.95,
title = NULL,
xlab = NULL,
ylab = NULL,
facet.by = NULL,
panel.labs = NULL,
short.panel.labs = TRUE,
ggtheme = theme_pubr(),
...
)
5.2 自定义参数ggpar()
使用函数ggpar()可以轻松自定义绘图, 阅读?ggpar进行更改:
- 主标题和轴标签:main,xlab,ylab
- 轴区间:xlim,ylim(例如:ylim = c(0,30))
- 轴比例尺:xscale,yscale(例如:yscale ="log2")
- 调色板:palette="Dark2"或调色板= c("gray","blue","red")
- 图例标题,标签和位置:legend="right"
- 绘图方向:orientation = c("vertical", "horizontal", "reverse")
5.3 实例
# Create some data format
set.seed(1234)
wdata = data.frame(
sex = factor(rep(c("F", "M"), each=200)),
weight = c(rnorm(200, 55), rnorm(200, 58)))
head(wdata, 4)#> sex weight
#> 1 F 53.79293
#> 2 F 55.27743
#> 3 F 56.08444
#> 4 F 52.65430
# Basic QQ plot
p1 <- ggqqplot(wdata, x = "weight")
p1
# Change colors and shape by groups ("sex")
# Use custom palette
p2 <- ggqqplot(wdata, x = "weight",
color = "sex", palette = c("#00AFBB", "#E7B800"))
p2
Reference
https://rpkgs.datanovia.com/ggpubr/reference/index.html
http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/