ggpubr002 绘制一个连续变量

1.密度图 ggdensity

1.1 用法

# 密度图 ggdensity
ggdensity(
# 数据，数据框  
  data, 
# 横坐标，绘制的变量
  x,   
# 纵坐标，引用数据需""
  y = "..density..", 
# 逻辑值。默认值为FALSE。仅当y是包含要绘制的多个变量的向量时使用。
# 如果为TRUE，则通过组合y变量的图来创建多面板图
  combine = FALSE, 
# 逻辑或字符值。默认值为FALSE。仅当y是包含要绘制的多个变量的向量时使用。
# 如果为TRUE，则在同一绘图区域中合并多个y变量。允许的值还包括"asis”（TRUE）和“ flip”。如果merge =“ flip”，则y变量用作x刻度标签，而x变量用作分组变量。
  merge = FALSE,  
# 密度线颜色和填充颜色
  color = "black",                                      
  fill = NA,
# 用于按组着色或填充的调色板
  palette = NULL,
# 更改点和轮廓的大小
  size = NULL,
  linetype = "solid",
# 透明度
  alpha = 0.5,
# 标题及标签
  title = NULL,
  xlab = NULL,
  ylab = NULL,
# 指定用于将图面划分为多个面板的分组变量
  facet.by = NULL,
  panel.labs = NULL,
  short.panel.labs = TRUE,
# 允许的值是“平均值”或“中位数”之一（分别用于添加平均值或中位数线）
  add = c("none", "mean", "median"),
  add.params = list(linetype = "dashed"),
# 辑值值，如果为TRUE，则添加边缘地毯。
  rug = FALSE,
  label = NULL,
# 字体标签，一个可以包含以下元素的组合的列表：
# 大小（例如：14），样式（例如：“纯”，“粗体”，“斜体”，“ bold.italic”）
# 颜色（例如：“红色”）的标签。例如font.label = list（size = 14，face =“ bold”，颜色=“ red”）。
# 若要仅指定大小和样式，请使用font.label = list（size = 14，face =“ plain”）
  font.label = list(size = 11, color = "black"),
  label.select = NULL,
# 是否使用ggrepel避免过度绘制文本标签的逻辑值
  repel = FALSE,
  label.rectangle = FALSE,
  ggtheme = theme_pubr(),
  ...
)

1.2 自定义参数ggpar()

使用函数ggpar()可以轻松自定义绘图, 阅读?ggpar进行更改：

主标题和轴标签：main，xlab，ylab
轴区间：xlim，ylim(例如：ylim = c(0，30))
轴比例尺：xscale，yscale(例如：yscale ="log2")
调色板：palette="Dark2"或调色板= c("gray"，"blue"，"red")
图例标题，标签和位置：legend="right"
绘图方向：orientation = c("vertical", "horizontal", "reverse")

1.3 实例

# Create some data format
set.seed(1234)
wdata = data.frame(
  sex = factor(rep(c("F", "M"), each=200)),
  weight = c(rnorm(200, 55), rnorm(200, 58)))

head(wdata, 4)#>   sex   weight
#> 1   F 53.79293
#> 2   F 55.27743
#> 3   F 56.08444
#> 4   F 52.65430
# Basic density plot
# Add mean line and marginal rug
p1 <- ggdensity(wdata, x = "weight", fill = "lightgray",
          add = "mean", rug = TRUE)
p1
#> Warning: geom_vline(): Ignoring `mapping` because `xintercept` was provided.#> Warning: geom_vline(): Ignoring `data` because `xintercept` was provided.
# Change outline colors by groups ("sex")
# Use custom palette
p2 <- ggdensity(wdata, x = "weight",
          add = "mean", rug = TRUE,
          color = "sex", palette = c("#00AFBB", "#E7B800"))
p2
# Change outline and fill colors by groups ("sex")
# Use custom palette
p3 <- ggdensity(wdata, x = "weight",
          add = "mean", rug = TRUE,
          color = "sex", fill = "sex",
          palette = c("#00AFBB", "#E7B800"))
p3
p4 <- ggdensity(wdata, x = "weight",
                add = "mean", rug = TRUE,
                color = "sex", fill = "sex",
                palette = c("#00AFBB", "#E7B800"),
                facet.by = "sex")
p4

2. 叠加正常密度图 stat_overlay_normal_density

在视觉上检查偏离正常程度的程度

2.1 用法

stat_overlay_normal_density(
# 由aes()或aes_()创建的美学映射集。如果指定且继承.aes = TRUE（默认值），它将与绘图顶层的默认映射结合。如果没有绘图映射，则必须提供映射。  
mapping = NULL,
  data = NULL,
  geom = "line",
  position = "identity",
  na.rm = FALSE,
  show.legend = NA,
  inherit.aes = TRUE,
  ...
)

2.2 举例

# Simpledensity plot
data("mtcars")
p1 <- ggdensity(mtcars, x = "mpg", fill = "red") +
  scale_x_continuous(limits = c(-1, 50)) +
  stat_overlay_normal_density(color = "red", linetype = "dashed")
p1
# Color by groups
data(iris)
p2 <- ggdensity(iris, "Sepal.Length", color = "Species") +
  stat_overlay_normal_density(aes(color = Species), linetype = "dashed")
p2
# Facet
p3 <- ggdensity(iris, "Sepal.Length", facet.by = "Species") +
  stat_overlay_normal_density(color = "red", linetype = "dashed")
p3

3.经验累积密度函数 Empirical cumulative density function

样本分布函数(sample distribution function)亦称经验分布函数，统计学中的基本概念之一。样本分布函数Fn(x)具有分布函数的性质，我们可以将其看成是以等概率1/n 取值X1，X2，…，Xn的离散型随机变量的分布函数，且该函数的图形呈跳跃式一条台阶形折线，如观测值不重复，则每一跳跃为1/n ，如有重复，则按1/n的倍数跳跃上升。

3.1 用法

ggecdf(
  data,
  x,
  combine = FALSE,
  merge = FALSE,
  color = "black",
  palette = NULL,
  size = NULL,
  linetype = "solid",
  title = NULL,
  xlab = NULL,
  ylab = NULL,
  facet.by = NULL,
  panel.labs = NULL,
  short.panel.labs = TRUE,
  ggtheme = theme_pubr(),
  ...
)

3.2 举例

# Create some data format
set.seed(1234)
wdata = data.frame(
  sex = factor(rep(c("F", "M"), each=200)),
  weight = c(rnorm(200, 55), rnorm(200, 58)))

head(wdata, 4)#>   sex   weight
#> 1   F 53.79293
#> 2   F 55.27743
#> 3   F 56.08444
#> 4   F 52.65430
# Basic ECDF plot
p1 <- ggecdf(wdata, x = "weight")
p1
# Change colors and linetype by groups ("sex")
# Use custom palette
p2 <- ggecdf(wdata, x = "weight",
       color = "sex", linetype = "sex",
       palette = c("#00AFBB", "#E7B800"))
p2

4.直方图 Histogram plot

4.1 用法

gghistogram(
# 数据，数据框  
  data, 
# 横坐标，绘制的变量
  x,   
# 纵坐标，引用数据需" "
  y = "..count..",
# 逻辑值。默认值为FALSE。仅当y是包含要绘制的多个变量的向量时使用。
# 如果为TRUE，则通过组合y变量的图来创建多面板图
  combine = FALSE,
# 逻辑或字符值。默认值为FALSE。仅当y是包含要绘制的多个变量的向量时使用。
# 如果为TRUE，则在同一绘图区域中合并多个y变量。允许的值还包括"asis”（TRUE）和“ flip”。如果merge =“ flip”，则y变量用作x刻度标签，而x变量用作分组变量。
  merge = FALSE,
  weight = NULL,
  color = "black",
  fill = NA,
# 着色板，用于按组着色或填充的调色板
  palette = NULL,
  size = NULL,
  linetype = "solid",
  alpha = 0.5,
# bin数默认为30。
  bins = NULL,
# 指定箱宽的数值。当您有很强的密集点图时，请使用介于0和1之间的值。例如binwidth = 0.2。
  binwidth = NULL,
  title = NULL,
  xlab = NULL,
  ylab = NULL,
# 长度为1或2的字符向量，指定用于将图面划分为多个面板的分组变量。应该在数据中
  facet.by = NULL,
# 用于修改构面面板标签的一个或两个字符向量的列表。例如，panel.labs = list（sex = c（“ Male”，“ Female”））指定“ sex”变量的标签。对于两个分组变量，您可以使用例如panel.labs = list（sex = c（“ Male”，“ Female”），rx = c（“ Obs”，“ Lev”，“ Lev2”）））。
  panel.labs = NULL,
  short.panel.labs = TRUE,
  add = c("none", "mean", "median"),
# 参数'add'的参数（颜色，大小，线型）；例如：add.params = list（颜色=“红色”）。
  add.params = list(linetype = "dashed"),
  rug = FALSE,
  add_density = FALSE,
  label = NULL,
  font.label = list(size = 11, color = "black"),
  label.select = NULL,
  repel = FALSE,
  label.rectangle = FALSE,
  position = position_identity(),
  ggtheme = theme_pubr(),
  ...
)

4.2 自定义参数ggpar()

使用函数ggpar()可以轻松自定义绘图, 阅读？ggpar进行更改：

主标题和轴标签：main，xlab，ylab
轴区间：xlim，ylim(例如：ylim = c(0，30))
轴比例尺：xscale，yscale(例如：yscale ="log2")
调色板：palette="Dark2"或调色板= c("gray"，"blue"，"red")
图例标题，标签和位置：legend="right"
绘图方向：orientation = c("vertical", "horizontal", "reverse")

4.3 实例

# Create some data format
set.seed(1234)
wdata = data.frame(
  sex = factor(rep(c("F", "M"), each=200)),
  weight = c(rnorm(200, 55), rnorm(200, 58)))

head(wdata, 4)
#>   sex   weight
#> 1   F 53.79293
#> 2   F 55.27743
#> 3   F 56.08444
#> 4   F 52.65430
# Basic density plot
# Add mean line and marginal rug
p1 <- gghistogram(wdata, x = "weight", fill = "lightgray",
            add = "mean", rug = TRUE)
p1
#> Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.#> Warning: geom_vline(): Ignoring `mapping` because `xintercept` was provided.#> Warning: geom_vline(): Ignoring `data` because `xintercept` was provided.
# Change outline colors by groups ("sex")
# Use custom color palette
p2 <- gghistogram(wdata, x = "weight",
            add = "mean", rug = TRUE,
            color = "sex", palette = c("#00AFBB", "#E7B800"))
p2
#> Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.
# Change outline and fill colors by groups ("sex")
# Use custom color palette
p3 <- gghistogram(wdata, x = "weight",
            add = "mean", rug = TRUE,
            color = "sex", fill = "sex",
            palette = c("#00AFBB", "#E7B800"))
p3
#> Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.


# Combine histogram and density plots
p4 <- gghistogram(wdata, x = "weight",
            add = "mean", rug = TRUE,
            fill = "sex", palette = c("#00AFBB", "#E7B800"),
            add_density = TRUE)
p4
#> Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.
# Weighted histogram
p5 <- gghistogram(iris, x = "Sepal.Length", weight = "Petal.Length")
p5
#> Warning: Using `bins = 30` by default. Pick better value with the argument `bins`.

5.分位数图示法 Quantile Quantile Plot

统计学里Q-Q图（Q代表分位数）是一个概率图，用图形的方式比较两个概率分布，把他们的两个分位数放在一起比较。首先选好分位数间隔。图上的点（x,y）反映出其中一个第二个分布（y坐标）的分位数和与之对应的第一分布（x坐标）的相同分位数。因此，这条线是一条以分位数间隔为参数的曲线。如果两个分布相似，则该Q-Q图趋近于落在y=x线上。如果两分布线性相关，则点在Q-Q图上趋近于落在一条直线上，但不一定在y=x线上。Q-Q图可以用来可在分布的位置-尺度范畴上可视化的评估参数。
从定义中可以看出Q-Q图主要用于检验数据分布的相似性，如果要利用Q-Q图来对数据进行正态分布的检验，则可以令x轴为正态分布的分位数，y轴为样本分位数，如果这两者构成的点分布在一条直线上，就证明样本数据与正态分布存在线性相关性，即服从正态分布。

5.1 用法

ggqqplot(
  data,
  x,
  combine = FALSE,
  merge = FALSE,
  color = "black",
  palette = NULL,
  size = NULL,
  shape = NULL,
  add = c("qqline", "none"),
  add.params = list(linetype = "solid"),
  conf.int = TRUE,
  conf.int.level = 0.95,
  title = NULL,
  xlab = NULL,
  ylab = NULL,
  facet.by = NULL,
  panel.labs = NULL,
  short.panel.labs = TRUE,
  ggtheme = theme_pubr(),
  ...
)

5.2 自定义参数ggpar()

使用函数ggpar()可以轻松自定义绘图, 阅读？ggpar进行更改：

主标题和轴标签：main，xlab，ylab
轴区间：xlim，ylim(例如：ylim = c(0，30))
轴比例尺：xscale，yscale(例如：yscale ="log2")
调色板：palette="Dark2"或调色板= c("gray"，"blue"，"red")
图例标题，标签和位置：legend="right"
绘图方向：orientation = c("vertical", "horizontal", "reverse")

5.3 实例

# Create some data format
set.seed(1234)
wdata = data.frame(
  sex = factor(rep(c("F", "M"), each=200)),
  weight = c(rnorm(200, 55), rnorm(200, 58)))

head(wdata, 4)#>   sex   weight
#> 1   F 53.79293
#> 2   F 55.27743
#> 3   F 56.08444
#> 4   F 52.65430
# Basic QQ plot
p1 <- ggqqplot(wdata, x = "weight")
p1
# Change colors and shape by groups ("sex")
# Use custom palette
p2 <- ggqqplot(wdata, x = "weight",
         color = "sex", palette = c("#00AFBB", "#E7B800"))
p2

Reference

https://rpkgs.datanovia.com/ggpubr/reference/index.html
http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/